X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/316670eb35587141e969394ae8537d66b9211e80..5c9f46613a83ebfc29a5b1f099448259e96a98f0:/bsd/sys/event.h diff --git a/bsd/sys/event.h b/bsd/sys/event.h index d22d5efb2..04385bc6c 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -60,6 +60,9 @@ #include #include +/* + * Filter types + */ #define EVFILT_READ (-1) #define EVFILT_WRITE (-2) #define EVFILT_AIO (-3) /* attached to aio requests */ @@ -75,9 +78,14 @@ #ifdef PRIVATE #define EVFILT_SOCK (-13) /* Socket events */ +#define EVFILT_MEMORYSTATUS (-14) /* Memorystatus events */ +#endif /* PRIVATE */ +#define EVFILT_EXCEPT (-15) /* Exception events */ +#ifdef PRIVATE +#define EVFILT_WORKLOOP (-17) /* Workloop events */ #endif /* PRIVATE */ -#define EVFILT_SYSCOUNT 13 +#define EVFILT_SYSCOUNT 17 #define EVFILT_THREADMARKER EVFILT_SYSCOUNT /* Internal use only */ #pragma pack(4) @@ -108,10 +116,22 @@ struct user32_kevent { uint16_t flags; /* general flags */ uint32_t fflags; /* filter-specific flags */ int32_t data; /* filter-specific data */ - user32_addr_t udata; /* opaque user data identifier */ + user32_addr_t udata; /* opaque user data identifier */ }; -#endif +struct kevent_internal_s { + uint64_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + int32_t qos; /* quality of service */ + uint32_t fflags; /* filter-specific flags */ +// uint32_t xflags; /* extra filter-specific flags */ + int64_t data; /* filter-specific data */ + uint64_t udata; /* opaque user data identifier */ + uint64_t ext[4]; /* filter-specific extensions */ +}; + +#endif /* KERNEL_PRIVATE */ #pragma pack() @@ -125,6 +145,26 @@ struct kevent64_s { uint64_t ext[2]; /* filter-specific extensions */ }; +#ifdef PRIVATE +struct kevent_qos_s { + uint64_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + int32_t qos; /* quality of service */ + uint64_t udata; /* opaque user data identifier */ + uint32_t fflags; /* filter-specific flags */ + uint32_t xflags; /* extra filter-specific flags */ + int64_t data; /* filter-specific data */ + uint64_t ext[4]; /* filter-specific extensions */ +}; + +/* + * Type definition for names/ids of dynamically allocated kqueues. + */ +typedef uint64_t kqueue_id_t; + +#endif /* PRIVATE */ + #define EV_SET(kevp, a, b, c, d, e, f) do { \ struct kevent *__kevp__ = (kevp); \ __kevp__->ident = (a); \ @@ -147,45 +187,138 @@ struct kevent64_s { __kevp__->ext[1] = (h); \ } while(0) + +/* kevent system call flags */ +#define KEVENT_FLAG_NONE 0x000 /* no flag value */ +#define KEVENT_FLAG_IMMEDIATE 0x001 /* immediate timeout */ +#define KEVENT_FLAG_ERROR_EVENTS 0x002 /* output events only include change errors */ + +#ifdef PRIVATE + +/* + * Rather than provide an EV_SET_QOS macro for kevent_qos_t structure + * initialization, we encourage use of named field initialization support + * instead. + */ + +#define KEVENT_FLAG_STACK_EVENTS 0x004 /* output events treated as stack (grows down) */ +#define KEVENT_FLAG_STACK_DATA 0x008 /* output data allocated as stack (grows down) */ +#define KEVENT_FLAG_UNBIND_CHECK_FLAGS 0x010 /* check the flags passed to kevent_qos_internal_unbind */ +#define KEVENT_FLAG_WORKQ 0x020 /* interact with the default workq kq */ +#define KEVENT_FLAG_WORKQ_MANAGER 0x200 /* current thread is the workq manager */ +#define KEVENT_FLAG_WORKLOOP 0x400 /* interact with the specified workloop kq */ +#define KEVENT_FLAG_SYNCHRONOUS_BIND 0x800 /* synchronous bind callback */ + +#define KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH 0x8000 /* attach current thread to workloop */ +#define KEVENT_FLAG_WORKLOOP_SERVICER_DETACH 0x10000 /* unbind current thread from workloop */ +#define KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST 0x20000 /* kq lookup by id must exist */ +#define KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST 0x40000 /* kq lookup by id must not exist */ +#define KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD 0x80000 /* do not create workqueue threads for this worloop */ + +#ifdef XNU_KERNEL_PRIVATE + +#define KEVENT_FLAG_LEGACY32 0x040 /* event data in legacy 32-bit format */ +#define KEVENT_FLAG_LEGACY64 0x080 /* event data in legacy 64-bit format */ +#define KEVENT_FLAG_KERNEL 0x1000 /* caller is in-kernel */ +#define KEVENT_FLAG_DYNAMIC_KQUEUE 0x2000 /* kqueue is dynamically allocated */ +#define KEVENT_FLAG_WORKLOOP_CANCELED 0x4000 /* workloop bind was cancelled */ + +#define KEVENT_FLAG_USER (KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS | \ + KEVENT_FLAG_STACK_EVENTS | KEVENT_FLAG_STACK_DATA | \ + KEVENT_FLAG_WORKQ | KEVENT_FLAG_WORKLOOP | \ + KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH | KEVENT_FLAG_WORKLOOP_SERVICER_DETACH | \ + KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST | KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST | \ + KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD) + +/* + * Since some filter ops are not part of the standard sysfilt_ops, we use + * kn_filtid starting from EVFILT_SYSCOUNT to identify these cases. This is to + * let kn_fops() get the correct fops for all cases. +*/ +#define EVFILTID_KQREAD (EVFILT_SYSCOUNT) +#define EVFILTID_PIPE_R (EVFILT_SYSCOUNT + 1) +#define EVFILTID_PIPE_W (EVFILT_SYSCOUNT + 2) +#define EVFILTID_PTSD (EVFILT_SYSCOUNT + 3) +#define EVFILTID_SOREAD (EVFILT_SYSCOUNT + 4) +#define EVFILTID_SOWRITE (EVFILT_SYSCOUNT + 5) +#define EVFILTID_SCK (EVFILT_SYSCOUNT + 6) +#define EVFILTID_SOEXCEPT (EVFILT_SYSCOUNT + 7) +#define EVFILTID_SPEC (EVFILT_SYSCOUNT + 8) +#define EVFILTID_BPFREAD (EVFILT_SYSCOUNT + 9) +#define EVFILTID_NECP_FD (EVFILT_SYSCOUNT + 10) +#define EVFILTID_FSEVENT (EVFILT_SYSCOUNT + 13) +#define EVFILTID_VN (EVFILT_SYSCOUNT + 14) +#define EVFILTID_TTY (EVFILT_SYSCOUNT + 16) +#define EVFILTID_PTMX (EVFILT_SYSCOUNT + 17) + +#define EVFILTID_MAX (EVFILT_SYSCOUNT + 18) + +#endif /* defined(XNU_KERNEL_PRIVATE) */ + +#define EV_SET_QOS 0 + +#endif /* PRIVATE */ + /* actions */ -#define EV_ADD 0x0001 /* add event to kq (implies enable) */ -#define EV_DELETE 0x0002 /* delete event from kq */ -#define EV_ENABLE 0x0004 /* enable event */ -#define EV_DISABLE 0x0008 /* disable event (not reported) */ -#define EV_RECEIPT 0x0040 /* force EV_ERROR on success, data == 0 */ +#define EV_ADD 0x0001 /* add event to kq (implies enable) */ +#define EV_DELETE 0x0002 /* delete event from kq */ +#define EV_ENABLE 0x0004 /* enable event */ +#define EV_DISABLE 0x0008 /* disable event (not reported) */ /* flags */ -#define EV_ONESHOT 0x0010 /* only report one occurrence */ -#define EV_CLEAR 0x0020 /* clear event state after reporting */ -#define EV_DISPATCH 0x0080 /* disable event after reporting */ +#define EV_ONESHOT 0x0010 /* only report one occurrence */ +#define EV_CLEAR 0x0020 /* clear event state after reporting */ +#define EV_RECEIPT 0x0040 /* force immediate event output */ + /* ... with or without EV_ERROR */ + /* ... use KEVENT_FLAG_ERROR_EVENTS */ + /* on syscalls supporting flags */ + +#define EV_DISPATCH 0x0080 /* disable event after reporting */ +#define EV_UDATA_SPECIFIC 0x0100 /* unique kevent per udata value */ + +#define EV_DISPATCH2 (EV_DISPATCH | EV_UDATA_SPECIFIC) + /* ... in combination with EV_DELETE */ + /* will defer delete until udata-specific */ + /* event enabled. EINPROGRESS will be */ + /* returned to indicate the deferral */ -#define EV_SYSFLAGS 0xF000 /* reserved by system */ -#define EV_FLAG0 0x1000 /* filter-specific flag */ -#define EV_FLAG1 0x2000 /* filter-specific flag */ +#define EV_VANISHED 0x0200 /* report that source has vanished */ + /* ... only valid with EV_DISPATCH2 */ + +#define EV_SYSFLAGS 0xF000 /* reserved by system */ +#define EV_FLAG0 0x1000 /* filter-specific flag */ +#define EV_FLAG1 0x2000 /* filter-specific flag */ /* returned values */ -#define EV_EOF 0x8000 /* EOF detected */ -#define EV_ERROR 0x4000 /* error, data contains errno */ +#define EV_EOF 0x8000 /* EOF detected */ +#define EV_ERROR 0x4000 /* error, data contains errno */ /* * Filter specific flags for EVFILT_READ * * The default behavior for EVFILT_READ is to make the "read" determination - * relative to the current file descriptor read pointer. The EV_POLL - * flag indicates the determination should be made via poll(2) semantics - * (which always returns true for regular files - regardless of the amount - * of unread data in the file). + * relative to the current file descriptor read pointer. + * + * The EV_POLL flag indicates the determination should be made via poll(2) + * semantics. These semantics dictate always returning true for regular files, + * regardless of the amount of unread data in the file. * - * On input, EV_OOBAND specifies that only OOB data should be looked for. - * The returned data count is the number of bytes beyond the current OOB marker. + * On input, EV_OOBAND specifies that filter should actively return in the + * presence of OOB on the descriptor. It implies that filter will return + * if there is OOB data available to read OR when any other condition + * for the read are met (for example number of bytes regular data becomes >= + * low-watermark). + * If EV_OOBAND is not set on input, it implies that the filter should not actively + * return for out of band data on the descriptor. The filter will then only return + * when some other condition for read is met (ex: when number of regular data bytes + * >=low-watermark OR when socket can't receive more data (SS_CANTRCVMORE)). * - * On output, EV_OOBAND indicates that OOB data is present + * On output, EV_OOBAND indicates the presence of OOB data on the descriptor. * If it was not specified as an input parameter, then the data count is the - * number of bytes before the current OOB marker. If at the marker, the - * data count indicates the number of bytes available after it. In either - * case, it's the amount of data one could expect to receive next. + * number of bytes before the current OOB marker, else data count is the number + * of bytes beyond OOB marker. */ -#define EV_POLL EV_FLAG0 +#define EV_POLL EV_FLAG0 #define EV_OOBAND EV_FLAG1 /* @@ -211,6 +344,82 @@ struct kevent64_s { #define NOTE_FFCTRLMASK 0xc0000000 /* mask for operations */ #define NOTE_FFLAGSMASK 0x00ffffff +#ifdef PRIVATE +/* + * data/hint fflags for EVFILT_WORKLOOP, shared with userspace + * + * The ident for thread requests should be the dynamic ID of the workloop + * The ident for each sync waiter must be unique to that waiter [for this workloop] + * + * + * Commands: + * + * @const NOTE_WL_THREAD_REQUEST [in/out] + * The kevent represents asynchronous userspace work and its associated QoS. + * There can only be a single knote with this flag set per workloop. + * + * @const NOTE_WL_SYNC_WAIT [in/out] + * This bit is set when the caller is waiting to become the owner of a workloop. + * If the NOTE_WL_SYNC_WAKE bit is already set then the caller is not blocked, + * else it blocks until it is set. + * + * The QoS field of the knote is used to push on other owners or servicers. + * + * @const NOTE_WL_SYNC_WAKE [in/out] + * Marks the waiter knote as being eligible to become an owner + * This bit can only be set once, trying it again will fail with EALREADY. + * + * + * Flags/Modifiers: + * + * @const NOTE_WL_UPDATE_QOS [in] (only NOTE_WL_THREAD_REQUEST) + * For successful updates (EV_ADD only), learn the new userspace async QoS from + * the kevent qos field. + * + * @const NOTE_WL_END_OWNERSHIP [in] + * If the update is successful (including deletions) or returns ESTALE, and + * the caller thread or the "suspended" thread is currently owning the workloop, + * then ownership is forgotten. + * + * @const NOTE_WL_DISCOVER_OWNER [in] + * If the update is successful (including deletions), learn the owner identity + * from the loaded value during debounce. This requires an address to have been + * filled in the EV_EXTIDX_WL_ADDR ext field, but doesn't require a mask to have + * been set in the EV_EXTIDX_WL_MASK. + * + * @const NOTE_WL_IGNORE_ESTALE [in] + * If the operation would fail with ESTALE, mask the error and pretend the + * update was successful. However the operation itself didn't happen, meaning + * that: + * - attaching a new knote will not happen + * - dropping an existing knote will not happen + * - NOTE_WL_UPDATE_QOS or NOTE_WL_DISCOVER_OWNER will have no effect + * + * This modifier doesn't affect NOTE_WL_END_OWNERSHIP. + */ +#define NOTE_WL_THREAD_REQUEST 0x00000001 +#define NOTE_WL_SYNC_WAIT 0x00000004 +#define NOTE_WL_SYNC_WAKE 0x00000008 +#define NOTE_WL_COMMANDS_MASK 0x0000000f /* Mask of all the [in] commands above */ + +#define NOTE_WL_UPDATE_QOS 0x00000010 +#define NOTE_WL_END_OWNERSHIP 0x00000020 +#define NOTE_WL_UPDATE_OWNER 0 /* ... compatibility define ... */ +#define NOTE_WL_DISCOVER_OWNER 0x00000080 +#define NOTE_WL_IGNORE_ESTALE 0x00000100 +#define NOTE_WL_UPDATES_MASK 0x000001f0 /* Mask of all the [in] updates above */ + +/* + * EVFILT_WORKLOOP ext[] array indexes/meanings. + */ +#define EV_EXTIDX_WL_LANE 0 /* lane identifier [in: sync waiter] + [out: thread request] */ +#define EV_EXTIDX_WL_ADDR 1 /* debounce address [in: NULL==no debounce] */ +#define EV_EXTIDX_WL_MASK 2 /* debounce mask [in] */ +#define EV_EXTIDX_WL_VALUE 3 /* debounce value [in: not current->ESTALE] + [out: new/debounce value] */ +#endif /* PRIVATE */ + /* * data/hint fflags for EVFILT_{READ|WRITE}, shared with userspace * @@ -218,6 +427,10 @@ struct kevent64_s { * realtive to the current file descriptor read pointer. */ #define NOTE_LOWAT 0x00000001 /* low water mark */ + +/* data/hint flags for EVFILT_EXCEPT, shared with userspace */ +#define NOTE_OOB 0x00000002 /* OOB data */ + /* * data/hint fflags for EVFILT_VNODE, shared with userspace */ @@ -229,6 +442,7 @@ struct kevent64_s { #define NOTE_RENAME 0x00000020 /* vnode was renamed */ #define NOTE_REVOKE 0x00000040 /* vnode access was revoked */ #define NOTE_NONE 0x00000080 /* No specific vnode event: to test for EVFILT_READ activation*/ +#define NOTE_FUNLOCK 0x00000100 /* vnode was unlocked by flock(2) */ /* * data/hint fflags for EVFILT_PROC, shared with userspace @@ -237,30 +451,56 @@ struct kevent64_s { * that hangs off the proc structure. They also both play games with the hint * passed to KNOTE(). If NOTE_SIGNAL is passed as a hint, then the lower bits * of the hint contain the signal. IF NOTE_FORK is passed, then the lower bits - * contain the PID of the child. + * contain the PID of the child (but the pid does not get passed through in + * the actual kevent). */ -#define NOTE_EXIT 0x80000000 /* process exited */ -#define NOTE_FORK 0x40000000 /* process forked */ -#define NOTE_EXEC 0x20000000 /* process exec'd */ -#define NOTE_REAP 0x10000000 /* process reaped */ -#define NOTE_SIGNAL 0x08000000 /* shared with EVFILT_SIGNAL */ -#define NOTE_EXITSTATUS 0x04000000 /* exit status to be returned, valid for child process only */ -#define NOTE_RESOURCEEND 0x02000000 /* resource limit reached, resource type returned */ - -#if CONFIG_EMBEDDED -/* 0x01000000 is reserved for future use */ - -/* App states notification */ -#define NOTE_APPACTIVE 0x00800000 /* app went to active state */ -#define NOTE_APPBACKGROUND 0x00400000 /* app went to background */ -#define NOTE_APPNONUI 0x00200000 /* app went to active with no UI */ -#define NOTE_APPINACTIVE 0x00100000 /* app went to inactive state */ -#define NOTE_APPALLSTATES 0x00f00000 -#endif /* CONFIG_EMBEDDED */ - -#define NOTE_PDATAMASK 0x000fffff /* mask for pid/signal */ +enum { + eNoteReapDeprecated __deprecated_enum_msg("This kqueue(2) EVFILT_PROC flag is deprecated") = 0x10000000 +}; + +#define NOTE_EXIT 0x80000000 /* process exited */ +#define NOTE_FORK 0x40000000 /* process forked */ +#define NOTE_EXEC 0x20000000 /* process exec'd */ +#define NOTE_REAP ((unsigned int)eNoteReapDeprecated /* 0x10000000 */) /* process reaped */ +#define NOTE_SIGNAL 0x08000000 /* shared with EVFILT_SIGNAL */ +#define NOTE_EXITSTATUS 0x04000000 /* exit status to be returned, valid for child process only */ +#define NOTE_EXIT_DETAIL 0x02000000 /* provide details on reasons for exit */ + +#define NOTE_PDATAMASK 0x000fffff /* mask for signal & exit status */ #define NOTE_PCTRLMASK (~NOTE_PDATAMASK) +/* + * If NOTE_EXITSTATUS is present, provide additional info about exiting process. + */ +enum { + eNoteExitReparentedDeprecated __deprecated_enum_msg("This kqueue(2) EVFILT_PROC flag is no longer sent") = 0x00080000 +}; +#define NOTE_EXIT_REPARENTED ((unsigned int)eNoteExitReparentedDeprecated) /* exited while reparented */ + +/* + * If NOTE_EXIT_DETAIL is present, these bits indicate specific reasons for exiting. + */ +#define NOTE_EXIT_DETAIL_MASK 0x00070000 +#define NOTE_EXIT_DECRYPTFAIL 0x00010000 +#define NOTE_EXIT_MEMORY 0x00020000 +#define NOTE_EXIT_CSERROR 0x00040000 + +#ifdef PRIVATE + +/* + * If NOTE_EXIT_MEMORY is present, these bits indicate specific jetsam condition. + */ +#define NOTE_EXIT_MEMORY_DETAIL_MASK 0xfe000000 +#define NOTE_EXIT_MEMORY_VMPAGESHORTAGE 0x80000000 /* jetsam condition: lowest jetsam priority proc killed due to vm page shortage */ +#define NOTE_EXIT_MEMORY_VMTHRASHING 0x40000000 /* jetsam condition: lowest jetsam priority proc killed due to vm thrashing */ +#define NOTE_EXIT_MEMORY_HIWAT 0x20000000 /* jetsam condition: process reached its high water mark */ +#define NOTE_EXIT_MEMORY_PID 0x10000000 /* jetsam condition: special pid kill requested */ +#define NOTE_EXIT_MEMORY_IDLE 0x08000000 /* jetsam condition: idle process cleaned up */ +#define NOTE_EXIT_MEMORY_VNODE 0X04000000 /* jetsam condition: virtual node kill */ +#define NOTE_EXIT_MEMORY_FCTHRASHING 0x02000000 /* jetsam condition: lowest jetsam priority proc killed due to filecache thrashing */ + +#endif + /* * data/hint fflags for EVFILT_VM, shared with userspace. */ @@ -269,6 +509,46 @@ struct kevent64_s { #define NOTE_VM_PRESSURE_SUDDEN_TERMINATE 0x20000000 /* will quit immediately on memory pressure */ #define NOTE_VM_ERROR 0x10000000 /* there was an error */ +#ifdef PRIVATE + +/* + * data/hint fflags for EVFILT_MEMORYSTATUS, shared with userspace. + */ +#define NOTE_MEMORYSTATUS_PRESSURE_NORMAL 0x00000001 /* system memory pressure has returned to normal */ +#define NOTE_MEMORYSTATUS_PRESSURE_WARN 0x00000002 /* system memory pressure has changed to the warning state */ +#define NOTE_MEMORYSTATUS_PRESSURE_CRITICAL 0x00000004 /* system memory pressure has changed to the critical state */ +#define NOTE_MEMORYSTATUS_LOW_SWAP 0x00000008 /* system is in a low-swap state */ +#define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN 0x00000010 /* process memory limit has hit a warning state */ +#define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL 0x00000020 /* process memory limit has hit a critical state - soft limit */ +#define NOTE_MEMORYSTATUS_MSL_STATUS 0xf0000000 /* bits used to request change to process MSL status */ + +#ifdef KERNEL_PRIVATE +/* + * data/hint fflags for EVFILT_MEMORYSTATUS, but not shared with userspace. + */ +#define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE 0x00000040 /* Used to restrict sending a warn event only once, per active limit, soft limits only */ +#define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE 0x00000080 /* Used to restrict sending a warn event only once, per inactive limit, soft limit only */ +#define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE 0x00000100 /* Used to restrict sending a critical event only once per active limit, soft limit only */ +#define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE 0x00000200 /* Used to restrict sending a critical event only once per inactive limit, soft limit only */ + +/* + * Use this mask to protect the kernel private flags. + */ +#define EVFILT_MEMORYSTATUS_ALL_MASK \ + (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP | \ + NOTE_MEMORYSTATUS_PROC_LIMIT_WARN | NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL | NOTE_MEMORYSTATUS_MSL_STATUS) + +#endif /* KERNEL_PRIVATE */ + +typedef enum vm_pressure_level { + kVMPressureNormal = 0, + kVMPressureWarning = 1, + kVMPressureUrgent = 2, + kVMPressureCritical = 3, +} vm_pressure_level_t; + +#endif /* PRIVATE */ + /* * data/hint fflags for EVFILT_TIMER, shared with userspace. * The default is a (repeating) interval timer with the data @@ -280,7 +560,21 @@ struct kevent64_s { #define NOTE_USECONDS 0x00000002 /* data is microseconds */ #define NOTE_NSECONDS 0x00000004 /* data is nanoseconds */ #define NOTE_ABSOLUTE 0x00000008 /* absolute timeout */ - /* ... implicit EV_ONESHOT */ + /* ... implicit EV_ONESHOT, timeout uses the gettimeofday epoch */ +#define NOTE_LEEWAY 0x00000010 /* ext[1] holds leeway for power aware timers */ +#define NOTE_CRITICAL 0x00000020 /* system does minimal timer coalescing */ +#define NOTE_BACKGROUND 0x00000040 /* system does maximum timer coalescing */ +#define NOTE_MACH_CONTINUOUS_TIME 0x00000080 + /* + * NOTE_MACH_CONTINUOUS_TIME: + * with NOTE_ABSOLUTE: causes the timer to continue to tick across sleep, + * still uses gettimeofday epoch + * with NOTE_MACHTIME and NOTE_ABSOLUTE: uses mach continuous time epoch + * without NOTE_ABSOLUTE (interval timer mode): continues to tick across sleep + */ +#define NOTE_MACHTIME 0x00000100 /* data is mach absolute time units */ + /* timeout uses the mach absolute time epoch */ + #ifdef PRIVATE /* * data/hint fflags for EVFILT_SOCK, shared with userspace. @@ -295,6 +589,18 @@ struct kevent64_s { #define NOTE_SUSPEND 0x00000040 /* output queue suspended */ #define NOTE_RESUME 0x00000080 /* output queue resumed */ #define NOTE_KEEPALIVE 0x00000100 /* TCP Keepalive received */ +#define NOTE_ADAPTIVE_WTIMO 0x00000200 /* TCP adaptive write timeout */ +#define NOTE_ADAPTIVE_RTIMO 0x00000400 /* TCP adaptive read timeout */ +#define NOTE_CONNECTED 0x00000800 /* socket is connected */ +#define NOTE_DISCONNECTED 0x00001000 /* socket is disconnected */ +#define NOTE_CONNINFO_UPDATED 0x00002000 /* connection info was updated */ +#define NOTE_NOTIFY_ACK 0x00004000 /* notify acknowledgement */ + +#define EVFILT_SOCK_LEVEL_TRIGGER_MASK \ + (NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_SUSPEND | NOTE_RESUME | NOTE_CONNECTED | NOTE_DISCONNECTED) + +#define EVFILT_SOCK_ALL_MASK \ + (NOTE_CONNRESET | NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_TIMEOUT | NOTE_NOSRCADDR | NOTE_IFDENIED | NOTE_SUSPEND | NOTE_RESUME | NOTE_KEEPALIVE | NOTE_ADAPTIVE_WTIMO | NOTE_ADAPTIVE_RTIMO | NOTE_CONNECTED | NOTE_DISCONNECTED | NOTE_CONNINFO_UPDATED | NOTE_NOTIFY_ACK) #endif /* PRIVATE */ @@ -313,6 +619,19 @@ struct kevent64_s { * receive the message and the requested (or default) message trailers. In addition, * the fflags field contains the return code normally returned by mach_msg(). * + * If MACH_RCV_MSG is specified, and the ext[1] field specifies a zero length, the + * system call argument specifying an ouput area (kevent_qos) will be consulted. If + * the system call specified an output data area, the user-space address + * of the received message is carved from that provided output data area (if enough + * space remains there). The address and length of each received message is + * returned in the ext[0] and ext[1] fields (respectively) of the corresponding kevent. + * + * IF_MACH_RCV_VOUCHER_CONTENT is specified, the contents of the message voucher is + * extracted (as specified in the xflags field) and stored in ext[2] up to ext[3] + * length. If the input length is zero, and the system call provided a data area, + * the space for the voucher content is carved from the provided space and its + * address and length is returned in ext[2] and ext[3] respectively. + * * If no message receipt options were provided in the fflags field on setup, no * message is received by this call. Instead, on output, the data field simply * contains the name of the actual port detected with a message waiting. @@ -328,6 +647,8 @@ struct kevent64_s { #define NOTE_CHILD 0x00000004 /* am a child process */ +#ifdef PRIVATE +#endif /* PRIVATE */ #ifndef KERNEL /* Temporay solution for BootX to use inode.h till kqueue moves to vfs layer */ @@ -338,8 +659,11 @@ SLIST_HEAD(klist, knote); #ifdef KERNEL -#ifdef KERNEL_PRIVATE +#ifdef XNU_KERNEL_PRIVATE #include +#include +#include /* FREAD, FWRITE */ +#include /* panic */ #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_KQUEUE); @@ -347,99 +671,397 @@ MALLOC_DECLARE(M_KQUEUE); TAILQ_HEAD(kqtailq, knote); /* a list of "queued" events */ +/* Bit size for packed field within knote */ +#define KNOTE_KQ_BITSIZE 40 + + +/* index into various kq queues */ +typedef uint8_t kq_index_t; +typedef uint16_t kn_status_t; + +#define KN_ACTIVE 0x0001 /* event has been triggered */ +#define KN_QUEUED 0x0002 /* event is on queue */ +#define KN_DISABLED 0x0004 /* event is disabled */ +#define KN_DROPPING 0x0008 /* knote is being dropped */ +#define KN_USEWAIT 0x0010 /* wait for knote use */ +#define KN_ATTACHING 0x0020 /* event is pending attach */ +#define KN_STAYACTIVE 0x0040 /* force event to stay active */ +#define KN_DEFERDELETE 0x0080 /* defer delete until re-enabled */ +#define KN_ATTACHED 0x0100 /* currently attached to source */ +#define KN_DISPATCH 0x0200 /* disables as part of deliver */ +#define KN_UDATA_SPECIFIC 0x0400 /* udata is part of matching */ +#define KN_SUPPRESSED 0x0800 /* event is suppressed during delivery */ +#define KN_STOLENDROP 0x1000 /* someone stole the drop privilege */ +#define KN_REQVANISH 0x2000 /* requested EV_VANISH */ +#define KN_VANISHED 0x4000 /* has vanished */ + +#define KN_DISPATCH2 (KN_DISPATCH | KN_UDATA_SPECIFIC) + /* combination defines deferred-delete mode enabled */ + struct knote { - int kn_inuse; /* inuse count */ - struct kqtailq *kn_tq; /* pointer to tail queue */ - TAILQ_ENTRY(knote) kn_tqe; /* linkage for tail queue */ - struct kqueue *kn_kq; /* which kqueue we are on */ - SLIST_ENTRY(knote) kn_link; /* linkage for search list */ - SLIST_ENTRY(knote) kn_selnext; /* klist element chain */ + TAILQ_ENTRY(knote) kn_tqe; /* linkage for tail queue */ + SLIST_ENTRY(knote) kn_link; /* linkage for search list */ + SLIST_ENTRY(knote) kn_selnext; /* klist element chain */ union { - struct fileproc *p_fp; /* file data pointer */ - struct proc *p_proc; /* proc pointer */ - struct ipc_pset *p_pset; /* pset pointer */ + struct fileproc *p_fp; /* file data pointer */ + struct proc *p_proc; /* proc pointer */ + struct ipc_mqueue *p_mqueue; /* pset pointer */ } kn_ptr; - struct filterops *kn_fop; - int kn_status; /* status bits */ - int kn_sfflags; /* saved filter flags */ - struct kevent64_s kn_kevent; - void *kn_hook; - int kn_hookid; - int64_t kn_sdata; /* saved data field */ - -#define KN_ACTIVE 0x01 /* event has been triggered */ -#define KN_QUEUED 0x02 /* event is on queue */ -#define KN_DISABLED 0x04 /* event is disabled */ -#define KN_DROPPING 0x08 /* knote is being dropped */ -#define KN_USEWAIT 0x10 /* wait for knote use */ -#define KN_ATTACHING 0x20 /* event is pending attach */ -#define KN_STAYQUEUED 0x40 /* force event to stay on queue */ + uint64_t kn_req_index:3, /* requested qos index */ + kn_qos_index:3, /* in-use qos index */ + kn_qos_override:3, /* qos override index */ + kn_qos_sync_override:3, /* qos sync override index */ + kn_vnode_kqok:1, + kn_vnode_use_ofst:1, + kn_qos_override_is_sync:1, /* qos override index is a sync override */ + kn_reserved:1, /* reserved bits */ + kn_filtid:8, /* filter id to index filter ops */ + kn_kq_packed:KNOTE_KQ_BITSIZE; /* packed pointer for kq */ + + union { + void *kn_hook; + uint64_t kn_hook_data; + }; + int64_t kn_sdata; /* saved data field */ + struct kevent_internal_s kn_kevent; + int kn_sfflags; /* saved filter flags */ + int kn_hookid; + uint16_t kn_inuse; /* inuse count */ + kn_status_t kn_status; /* status bits */ #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter #define kn_flags kn_kevent.flags +#define kn_qos kn_kevent.qos +#define kn_udata kn_kevent.udata #define kn_fflags kn_kevent.fflags +#define kn_xflags kn_kevent.xflags #define kn_data kn_kevent.data -#define kn_udata kn_kevent.udata #define kn_ext kn_kevent.ext #define kn_fp kn_ptr.p_fp }; -/* Hint values for f_touch filter operation */ -#define EVENT_REGISTER 1 -#define EVENT_PROCESS 2 +static inline struct kqueue *knote_get_kq(struct knote *kn) +{ + if (!(kn->kn_kq_packed)) + return 0; + else + return (struct kqueue *)((uintptr_t)(kn->kn_kq_packed) + (uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS); +} + +static inline void knote_set_kq(struct knote *kn, void *kq) +{ + if (!kq) + kn->kn_kq_packed = 0; + else { + uint64_t offset = ((uintptr_t)kq - (uintptr_t)VM_MIN_KERNEL_AND_KEXT_ADDRESS); + kn->kn_kq_packed = offset; + } +} + +static inline int knote_get_seltype(struct knote *kn) +{ + switch (kn->kn_filter) { + case EVFILT_READ: + return FREAD; + case EVFILT_WRITE: + return FWRITE; + default: + panic("%s(%p): invalid filter %d\n", + __func__, kn, kn->kn_filter); + return 0; + } +} + +static inline void knote_set_error(struct knote *kn, int error) +{ + kn->kn_flags |= EV_ERROR; + kn->kn_data = error; +} + +struct filt_process_s { + int fp_fd; + unsigned int fp_flags; + user_addr_t fp_data_out; + user_size_t fp_data_size; + user_size_t fp_data_resid; +}; +typedef struct filt_process_s *filt_process_data_t; + +/* + * Filter operators + * + * These routines, provided by each filter, are called to attach, detach, deliver events, + * change/update filter registration and process/deliver events. They are called with the + * with a use-count referenced knote, with the kq unlocked. Here are more details: + * + * f_isfd - + * identifies if the "ident" field in the kevent structure is a file-descriptor. + * + * If so, the knote is associated with the file descriptor prior to attach and + * auto-removed when the file descriptor is closed (this latter behavior may change + * for EV_DISPATCH2 kevent types to allow delivery of events identifying unintended + * closes). + * + * Otherwise the knote is hashed by the ident and has no auto-close behavior. + * + * f_adjusts_qos - + * identifies if the filter can adjust its QoS during its lifetime. + * + * Currently, EVFILT_MAACHPORT is the only filter using this facility. + * + * f_needs_boost - + * [OPTIONAL] used by filters to communicate they need to hold a boost + * while holding a usecount on this knote. This is called with the kqlock + * held. + * + * This is only used by EVFILT_WORKLOOP currently. + * + * f_attach - + * called to attach the knote to the underlying object that will be delivering events + * through it when EV_ADD is supplied and no existing matching event is found + * + * provided a knote that is pre-attached to the fd or hashed (see above) but is + * specially marked to avoid concurrent access until the attach is complete. The + * kevent structure embedded in this knote has been filled in with a sanitized + * version of the user-supplied kevent data. However, the user-supplied filter-specific + * flags (fflags) and data fields have been moved into the knote's kn_sfflags and kn_sdata + * fields respectively. These are usually interpretted as a set of "interest" flags and + * data by each filter - to be matched against delivered events. + * + * The attach operator indicated errors by setting the EV_ERROR flog in the flags field + * embedded in the knote's kevent structure - with the specific error indicated in the + * corresponding data field. + * + * The return value indicates if the knote should already be considered "activated" at + * the time of attach (one or more of the interest events has already occured). + * + * f_post_attach - + * [OPTIONAL] called after a successful attach, with the kqueue lock held, + * returns lock held, may drop and re-acquire + * + * If this function is non-null, then it indicates that the filter wants + * to perform an action after a successful ATTACH of a knote. + * + * Currently, EVFILT_WORKLOOP is the only filter using this facility. + * + * The return value indicates an error to report to userland. + * + * + * f_detach - + * called to disassociate the knote from the underlying object delivering events + * the filter should not attempt to deliver events through this knote after this + * operation returns control to the kq system. + * + * f_event - + * if the knote() function (or KNOTE() macro) is called against a list of knotes, + * this operator will be called on each knote in the list. + * + * The "hint" parameter is completely filter-specific, but usually indicates an + * event or set of events that have occured against the source object associated + * with the list. + * + * The return value indicates if the knote should already be considered "activated" at + * the time of attach (one or more of the interest events has already occured). + * + * f_drop_and_unlock - + * [OPTIONAL] called with the kqueue locked, and has to unlock + * + * If this function is non-null, then it indicates that the filter + * wants to handle EV_DELETE events. This is necessary if a particular + * filter needs to synchronize knote deletion with its own filter lock. + * Currently, EVFILT_WORKLOOP is the only filter using this facility. + * + * The return value indicates an error during the knote drop, i.e., the + * knote still exists and user space should re-drive the EV_DELETE. + * + * If the return value is ERESTART, kevent_register() is called from + * scratch again (useful to wait for usecounts to drop and then + * reevaluate the relevance of that drop) + * + * + * f_process - + * called when attempting to deliver triggered events to user-space. + * + * If the knote was previously activated, this operator will be called when a + * thread is trying to deliver events to user-space. The filter gets one last + * chance to determine if the event/events are still interesting for this knote + * (are the conditions still right to deliver an event). If so, the filter + * fills in the output kevent structure with the information to be delivered. + * + * The input context/data parameter is used during event delivery. Some + * filters allow additional data delivery as part of event delivery. This + * context field indicates if space was made available for these additional + * items and how that space is to be allocated/carved-out. + * + * The filter may set EV_CLEAR or EV_ONESHOT in the output flags field to indicate + * special post-delivery dispositions for the knote. + * + * EV_CLEAR - indicates that all matching events have been delivered. Even + * though there were events to deliver now, there will not be any + * more until some additional events are delivered to the knote + * via the f_event operator, or the interest set is changed via + * the f_touch operator. The knote can remain deactivated after + * processing this event delivery. + * + * EV_ONESHOT - indicates that this is the last event to be delivered via + * this knote. It will automatically be deleted upon delivery + * (or if in dispatch-mode, upon re-enablement after this delivery). + * + * The return value indicates if the knote has delivered an output event. + * Unless one of the special output flags was set in the output kevent, a non- + * zero return value ALSO indicates that the knote should be re-activated + * for future event processing (in case it delivers level-based or a multi-edge + * type events like message queues that already exist). + * + * NOTE: In the future, the boolean may change to an enum that allows more + * explicit indication of just delivering a current event vs delivering + * an event with more events still pending. + * + * f_touch - + * called to update the knote with new state from the user during EVFILT_ADD/ENABLE/DISABLE + * on an already-attached knote. + * + * f_touch should copy relevant new data from the kevent into the knote. + * (if KN_UDATA_SPECIFIC is not set, you may need to update the udata too) + * + * operator must lock against concurrent f_event and f_process operations. + * + * A return value of 1 indicates that the knote should now be considered 'activated'. + * + * f_touch can set EV_ERROR with specific error in the data field to return an error to the client. + * You should return 1 to indicate that the kevent needs to be activated and processed. + * + * f_peek - + * For knotes marked KN_STAYACTIVE, indicate if the knote is truly active at + * the moment (not used for event delivery, but for status checks). + */ struct filterops { - int f_isfd; /* true if ident == filedescriptor */ - int (*f_attach)(struct knote *kn); - void (*f_detach)(struct knote *kn); - int (*f_event)(struct knote *kn, long hint); - /* Optional f_touch operation, called only if !f_isfd && non-NULL */ - void (*f_touch)(struct knote *kn, struct kevent64_s *kev, long type); - /* Optional f_peek operation, called only if KN_STAYQUEUED is set */ + bool f_isfd; /* true if ident == filedescriptor */ + bool f_adjusts_qos; /* true if the filter can override the knote */ + bool (*f_needs_boost)(struct kevent_internal_s *kev); + int (*f_attach)(struct knote *kn, struct kevent_internal_s *kev); + int (*f_post_attach)(struct knote *kn, struct kevent_internal_s *kev); + void (*f_detach)(struct knote *kn); + int (*f_event)(struct knote *kn, long hint); + int (*f_touch)(struct knote *kn, struct kevent_internal_s *kev); + int (*f_drop_and_unlock)(struct knote *kn, struct kevent_internal_s *kev); + int (*f_process)(struct knote *kn, struct filt_process_s *data, struct kevent_internal_s *kev); unsigned (*f_peek)(struct knote *kn); }; struct proc; -struct wait_queue; +struct waitq; SLIST_HEAD(klist, knote); -extern void knote_init(void) __attribute__((section("__TEXT, initcode"))); +extern void knote_init(void); extern void klist_init(struct klist *list); #define KNOTE(list, hint) knote(list, hint) #define KNOTE_ATTACH(list, kn) knote_attach(list, kn) #define KNOTE_DETACH(list, kn) knote_detach(list, kn) - extern void knote(struct klist *list, long hint); extern int knote_attach(struct klist *list, struct knote *kn); extern int knote_detach(struct klist *list, struct knote *kn); -extern int knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t wql); -extern int knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t *wqlp); -extern void knote_fdclose(struct proc *p, int fd); -extern void knote_markstayqueued(struct knote *kn); +extern void knote_vanish(struct klist *list); +extern int knote_link_waitq(struct knote *kn, struct waitq *wq, uint64_t *reserved_link); +extern int knote_unlink_waitq(struct knote *kn, struct waitq *wq); +extern void knote_fdclose(struct proc *p, int fd, int force); +extern void knote_markstayactive(struct knote *kn); +extern void knote_clearstayactive(struct knote *kn); +extern void knote_adjust_qos(struct knote *kn, int qos, int override, kq_index_t sync_override_index); +extern void knote_adjust_sync_qos(struct knote *kn, kq_index_t sync_qos, boolean_t lock_kq); +extern const struct filterops *knote_fops(struct knote *kn); +extern void knote_set_error(struct knote *kn, int error); -#endif /* !KERNEL_PRIVATE */ +int kevent_exit_on_workloop_ownership_leak(thread_t thread); +int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize); +int kevent_copyout_proc_dynkqids(void *proc, user_addr_t ubuf, + uint32_t ubufsize, int32_t *nkqueues_out); +int kevent_copyout_dynkqinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf, + uint32_t ubufsize, int32_t *size_out); +int kevent_copyout_dynkqextinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf, + uint32_t ubufsize, int32_t *nknotes_out); + +#elif defined(KERNEL_PRIVATE) /* !XNU_KERNEL_PRIVATE: kexts still need a klist structure definition */ + +#include +struct proc; +struct knote; +SLIST_HEAD(klist, knote); + +#endif /* !XNU_KERNEL_PRIVATE && KERNEL_PRIVATE */ + +#ifdef KERNEL_PRIVATE +#ifdef PRIVATE + +/* make these private functions available to the pthread kext */ +extern int kevent_qos_internal(struct proc *p, int fd, + user_addr_t changelist, int nchanges, + user_addr_t eventlist, int nevents, + user_addr_t data_out, user_size_t *data_available, + unsigned int flags, int32_t *retval); + +extern int kevent_qos_internal_bind(struct proc *p, + int qos, thread_t thread, unsigned int flags); +extern int kevent_qos_internal_unbind(struct proc *p, + int qos, thread_t thread, unsigned int flags); + +extern int kevent_id_internal(struct proc *p, kqueue_id_t *id, + user_addr_t changelist, int nchanges, + user_addr_t eventlist, int nevents, + user_addr_t data_out, user_size_t *data_available, + unsigned int flags, int32_t *retval); + +#endif /* PRIVATE */ +#endif /* KERNEL_PRIVATE */ #else /* KERNEL */ +#include struct timespec; __BEGIN_DECLS int kqueue(void); -int kevent(int kq, const struct kevent *changelist, int nchanges, - struct kevent *eventlist, int nevents, - const struct timespec *timeout); -int kevent64(int kq, const struct kevent64_s *changelist, - int nchanges, struct kevent64_s *eventlist, - int nevents, unsigned int flags, - const struct timespec *timeout); +int kevent(int kq, + const struct kevent *changelist, int nchanges, + struct kevent *eventlist, int nevents, + const struct timespec *timeout); +int kevent64(int kq, + const struct kevent64_s *changelist, int nchanges, + struct kevent64_s *eventlist, int nevents, + unsigned int flags, + const struct timespec *timeout); + +#ifdef PRIVATE +int kevent_qos(int kq, + const struct kevent_qos_s *changelist, int nchanges, + struct kevent_qos_s *eventlist, int nevents, + void *data_out, size_t *data_available, + unsigned int flags); + +int kevent_id(kqueue_id_t id, + const struct kevent_qos_s *changelist, int nchanges, + struct kevent_qos_s *eventlist, int nevents, + void *data_out, size_t *data_available, + unsigned int flags); +#endif /* PRIVATE */ + __END_DECLS #endif /* KERNEL */ +#ifdef PRIVATE + +/* Flags for pending events notified by kernel via return-to-kernel ast */ +#define R2K_WORKLOOP_PENDING_EVENTS 0x1 +#define R2K_WORKQ_PENDING_EVENTS 0x2 + +#endif /* PRIVATE */ + #endif /* !_SYS_EVENT_H_ */