.\"
-.\" Copyright (c) 2008 Apple Inc. All rights reserved.
+.\" Copyright (c) 2008-2016 Apple Inc. All rights reserved.
.\"
.\" @APPLE_LICENSE_HEADER_START@
.\"
.Sh NAME
.Nm kqueue ,
.Nm kevent ,
-and
.Nm kevent64
+and
+.Nm kevent_qos
.Nd kernel event notification mechanism
.Sh LIBRARY
.Lb libc
.Fn kevent "int kq" "const struct kevent *changelist" "int nchanges" "struct kevent *eventlist" "int nevents" "const struct timespec *timeout"
.Ft int
.Fn kevent64 "int kq" "const struct kevent64_s *changelist" "int nchanges" "struct kevent64_s *eventlist" "int nevents" "unsigned int flags" "const struct timespec *timeout"
+.Ft int
+.Fn kevent_qos "int kq" "const struct kevent_qos_s *changelist" "int nchanges" "struct kevent_qos_s *eventlist" "int nevents" "void *data_out" "size_t *data_available" "unsigned int flags"
.Fn EV_SET "&kev" ident filter flags fflags data udata
.Fn EV_SET64 "&kev" ident filter flags fflags data udata "ext[0]" "ext[1]"
+.Fn EV_SET_QOS "&kev" ident filter flags qos udata fflags xflags data "ext[0]" "ext[1]" "ext[2]" "ext[3]"
.Sh DESCRIPTION
The
.Fn kqueue
-system call
-provides a generic method of notifying the user when an kernel
+system call allocates a kqueue file descriptor. This file descriptor
+provides a generic method of notifying the user when a kernel
event (kevent) happens or a condition holds, based on the results
of small pieces of kernel code termed filters.
-A kevent is identified by an (ident, filter) pair and specifies
-the interesting conditions to be notified about for that pair.
-An (ident, filter) pair can only appear once in a given kqueue.
-Subsequent attempts to register the same pair for a given kqueue
+.Pp
+A kevent is identified by an (ident, filter, and optional udata value)
+tuple. It specifies the interesting conditions to be notified about
+for that tuple. An (ident, filter, and optional udata value) tuple can
+only appear once in a given kqueue.
+Subsequent attempts to register the same tuple for a given kqueue
will result in the replacement of the conditions being watched,
not an addition.
+Whether the udata value is considered as part of the tuple is controlled
+by the EV_UDATA_SPECIFIC flag on the kevent.
.Pp
The filter identified in a kevent is executed upon the initial
registration of that event in order to detect whether a preexisting
.Xr fork 2 .
.Pp
The
-.Fn kevent
-and
+.Fn kevent,
.Fn kevent64
+and
+.Fn kevent_qos
system calls
are used to register events with the queue, and return any pending
events to the user.
.Fa changelist
argument
is a pointer to an array of
-.Va kevent
-or
+.Va kevent,
.Va kevent64_s
+or
+.Va kevent_qos_s
structures, as defined in
.Aq Pa sys/event.h .
All changes contained in the
The
.Fa eventlist
argument
-is a pointer to an array of
-.Va kevent
-or
+is a pointer to an array of out
+.Va kevent,
.Va kevent64_s
+or
+.Va kevent_qos_s
structures.
The
.Fa nevents
-argument
-determines the size of
+argument determines the size of
.Fa eventlist .
+If the KEVENT_FLAG_STACK_EVENTS flag is provided on the system call,
+the eventlist array is filled in in stack order (starting in the
+highest available index) instead of typical array order.
+The
+.Fa out_data
+argument provides space for extra out data provided by specific filters.
+The
+.Fa data_available
+argument's contents specified the space available in the data pool on input,
+and contains the amount still remaining on output.
+If the KEVENT_FLAG_STACK_DATA flag is specified on the system call,
+the data is allocated from the pool in stack order instead of typical heap order.
If
.Fa timeout
is a non-NULL pointer, it specifies a maximum interval to wait
and
.Fn kevent64
wait indefinitely. To effect a poll, the
+.Fa flags
+argument to
+.Fn kevent64
+or
+.Fn kevent_qos
+can include the KEVENT_FLAG_IMMEDIATE value to indicate an
+immediate timeout. Alternatively, the
.Fa timeout
argument should be non-NULL, pointing to a zero-valued
.Va timespec
.Fn EV_SET64
initializes a
.Va kevent64_s
+structure and
+.Fn EV_SET_QOS
+initializes a
+.Va kevent_qos_s
structure.
.Pp
The
-.Va kevent
-and
+.Va kevent,
.Va kevent64_s
+and
+.Va kevent_qos_s
structures are defined as:
.Bd -literal
struct kevent {
void *udata; /* opaque user data identifier */
};
-
struct kevent64_s {
uint64_t ident; /* identifier for this event */
int16_t filter; /* filter for event */
uint64_t udata; /* opaque user data identifier */
uint64_t ext[2]; /* filter-specific extensions */
};
+
+struct kevent_qos_s {
+ uint64_t ident; /* identifier for this event */
+ int16_t filter; /* filter for event */
+ uint16_t flags; /* general flags */
+ uint32_t qos; /* quality of service when servicing event */
+ uint64_t udata; /* opaque user data identifier */
+ uint32_t fflags; /* filter-specific flags */
+ uint32_t xflags; /* extra filter-specific flags */
+ int64_t data; /* filter-specific data */
+ uint64_t ext[4]; /* filter-specific extensions */
+};
.Ed
.Pp
----
.Pp
The fields of
-.Fa struct kevent
-and
+.Fa struct kevent,
.Fa struct kevent64_s
+and
+.Fa struct kevent_qos_s
are:
.Bl -tag -width XXXfilter
.It ident
-Value used to identify this event.
+Value used to identify the source of the event.
The exact interpretation is determined by the attached filter,
but often is a file descriptor.
.It filter
.It data
Filter-specific data value.
.It udata
-Opaque user-defined value passed through the kernel unchanged.
+Opaque user-defined value passed through the kernel unchanged. It can
+optionally be part of the uniquing decision of the kevent system
.El
.Pp
In addition,
what type of filter is being used.
.El
.Pp
+In addition,
+.Fa struct kevent_qos_s
+contains:
+.Bl -tag -width XXXfilter
+.It xflags
+Extra filter-specific flags.
+.It ext[4]
+The QoS variant provides twice as many extension values for filter-specific uses.
+.El
+.Pp
----
.Pp
The
unless overridden by the EV_DISABLE flag.
.It EV_ENABLE
Permit
-.Fn kevent
-and
+.Fn kevent,
.Fn kevent64
+and
+.Fn kevent_qos
to return the event if it is triggered.
.It EV_DISABLE
Disable the event so
-.Fn kevent
-and
+.Fn kevent,
.Fn kevent64
+and
+.Fn kevent_qos
will not return it. The filter itself is not disabled.
.It EV_DELETE
Removes the event from the kqueue. Events which are attached to
.It EV_RECEIPT
This flag is useful for making bulk changes to a kqueue without draining any
pending events. When passed as input, it forces EV_ERROR to always be returned.
-When a filter is successfully added. The
+When a filter is successfully added, the
.Va data
field will be zero.
.It EV_ONESHOT
set this flag internally.
.It EV_EOF
Filters may set this flag to indicate filter-specific EOF condition.
+.It EV_OOBAND
+Read filter on socket may set this flag to indicate the presence of out of
+band data on the descriptor.
.It EV_ERROR
See
.Sx RETURN VALUES
.Pp
The predefined system filters are listed below.
Arguments may be passed to and from the filter via the
+.Va data,
.Va fflags
-and
-.Va data
+and optionally
+.Va xflags
fields in the
-.Va kevent
-or
+.Va kevent,
.Va kevent64_s
+or
+.Va kevent_qos_s
structure.
.Bl -tag -width EVFILT_MACHPORT
.It EVFILT_READ
.Va fflags ,
and specifying the new low water mark in
.Va data .
+The derived per filter low water mark value is, however, bounded
+by socket receive buffer's high and low water mark values.
On return,
.Va data
contains the number of bytes of protocol data available to read.
.Pp
+The presence of EV_OOBAND in
+.Va flags ,
+indicates the presence of out of band data on the socket
+.Va data
+equal to the potential number of OOB bytes availble to read.
+.Pp
If the read direction of the socket has shutdown, then the filter
also sets EV_EOF in
.Va flags ,
contains the offset from current position to end of file,
and may be negative.
.It "Fifos, Pipes"
-Returns when the there is data to read;
+Returns when there is data to read;
.Va data
contains the number of bytes available.
.Pp
This may be cleared by passing in EV_CLEAR, at which point the
filter will resume waiting for data to become available before
returning.
+.It "Device nodes"
+Returns when there is data to read from the device;
+.Va data
+contains the number of bytes available. If the device does
+not support returning number of bytes, it will not allow the
+filter to be attached. However, if the NOTE_LOWAT flag is
+specified and the
+.Va data
+field contains 1 on input, those devices will attach - but
+cannot be relied upon to provide an accurate count of bytes
+to be read on output.
.El
+.It Dv EVFILT_EXCEPT
+Takes a descriptor as the identifier, and returns whenever one of the
+specified exceptional conditions has occurred on the descriptor. Conditions
+are specified in
+.Va fflags .
+Currently, this filter can be used to monitor the arrival of
+out-of-band data on a socket descriptor using the filter flag
+.Dv NOTE_OOB .
+.Pp
+If the read direction of the socket has shutdown, then the filter
+also sets EV_EOF in
+.Va flags ,
+and returns the socket error (if any) in
+.Va fflags .
.It EVFILT_WRITE
Takes a file descriptor as the identifier, and returns whenever
it is possible to write to the descriptor. For sockets, pipes
Access to the file was revoked via
.Xr revoke 2
or the underlying fileystem was unmounted.
+.It NOTE_FUNLOCK
+The file was unlocked by calling
+.Xr flock 2
+or
+.Xr close 2
.El
.Pp
On return,
.Va fflags
-contains the events which triggered the filter.
+contains the filter-specific flags which are associated with
+the triggered events seen by this filter.
.It EVFILT_PROC
Takes the process ID to monitor as the identifier and the events to watch for
in
.Bl -tag -width NOTE_SIGNAL
.It NOTE_EXIT
The process has exited.
+.It NOTE_EXITSTATUS
+The process has exited and its exit status is in filter specific data. Valid only on child processes and to be used along with NOTE_EXIT.
.It NOTE_FORK
The process created a child process via
.Xr fork 2
.It NOTE_REAP
The process was reaped by the parent via
.Xr wait 2
-or similar call.
+or similar call. Deprecated, use NOTE_EXIT.
.El
.Pp
On return,
.It EVFILT_MACHPORT
Takes the name of a mach port, or port set, in
.Va ident
-and waits until a message is received on the port or port set. When a message
-is recieved, the size of the message is returned in
-.Va data
-and if
+and waits until a message is enqueued on the port or port set. When a message
+is detected, but not directly received by the kevent call, the name of the
+specific port where the message is enqueued is returned in
+.Va data .
+If
.Va fflags
-is set to MACH_RCV_MSG, a pointer to the message is returned in ext[0].
+contains MACH_RCV_MSG, the ext[0] and ext[1] flags are assumed to contain
+a pointer to the buffer where the message is to be received and the size
+of the receive buffer, respectively. If MACH_RCV_MSG is specifed, yet the
+buffer size in ext[1] is zero, The space for the buffer may be carved out
+of the
+.Va
+data_out
+area provided to
+.Fn kevent_qos
+if there is enough space remaining there.
.It EVFILT_TIMER
-Establishes an interval timer with the data
-timer identified by
-.Va ident .
-When adding a timer,
+Establishes an interval timer identified by
+.Va ident
+where
.Va data
-specifies the timeout period and
+specifies the timeout period (in milliseconds).
+.Pp
.Va fflags
-can be set to one of the following:
-.Bl -tag -width NOTE_ABSOLUTE
+can include one of the following flags to specify a different unit:
+.Bl -tag -width NOTE_NSECONDS
.It NOTE_SECONDS
-data is in seconds
+.Va data
+is in seconds
.It NOTE_USECONDS
-data is in microseconds
+.Va data
+is in microseconds
.It NOTE_NSECONDS
-data is in nanoseconds
-.It NOTE_ABSOLUTE
-data is an absolute timeout
-.El
-.Pp
-If fflags is not set, the default is milliseconds. The timer will be periodic unless EV_ONESHOT is specified.
-On return,
.Va data
-contains the number of times the timeout has expired since the last call to
-.Fn kevent
-or
-.Fn kevent64 .
-This filter automatically sets the EV_CLEAR flag internally.
-.It EVFILT_SESSION
-Takes the audit session ID to monitor as the identifier and the events to watch for in
-.Va fflags ,
-and returns when one or more of the requested session events occurs.
-To monitor for events for any audit session the value AS_ANY_ASID
-should be used as the identifier. With AS_ANY_ASID, as new audit
-sessions are created they are included as if the were added
-individually. The events to monitor are:
-.Bl -tag -width NOTE_AS_UPDATE
-.It NOTE_AS_START
-A new audit session has started.
-.It NOTE_AS_END
-All the processes in the audit session have exited.
-.It NOTE_AS_CLOSE
-This audit session is no longer valid in the kernel. In other words, it
-is now safe to dispose of any cached information about this session or
-reuse its session ID for a new audit session.
-.It NOTE_AS_UPDATE
-The audit session information was updated. The audit session information is
-considered immutable once initially set. If this becomes enforced in
-the kernel then this event may no longer be needed and may become
-obsolete.
-.It NOTE_AS_ERR
-This flag is returned if the system was unable to attach an event to a
-new session when the audit session ID of AS_ANY_ASID
-is used. This is usually due to resource limitations.
+is in nanoseconds
+.It NOTE_MACHTIME
+.Va data
+is in Mach absolute time units
.El
.Pp
-On return,
.Va fflags
-contains the events which triggered the filter,
-.Va ident
-contains the audit session ID, and
+can also include
+.Dv NOTE_ABSOLUTE,
+which establishes an
+.Dv EV_ONESHOT
+timer with an absolute deadline instead of an interval.
+The absolute deadline is expressed in terms of
+.Xr gettimeofday 2 .
+With
+.Dv NOTE_MACHTIME,
+the deadline is expressed in terms of
+.Fn mach_absolute_time .
+.Pp
+The timer can be coalesced with other timers to save power. The following flags can be set in
+.Va fflags
+to modify this behavior:
+.Bl -tag -width NOTE_BACKGROUND
+.It NOTE_CRITICAL
+override default power-saving techniques to more strictly respect the leeway value
+.It NOTE_BACKGROUND
+apply more power-saving techniques to coalesce this timer with other timers
+.It NOTE_LEEWAY
+.Va ext[1]
+holds user-supplied slop in deadline for timer coalescing.
+.El
+.Pp
+The timer will be periodic unless
+.Dv EV_ONESHOT
+is specified.
+On return,
.Va data
-contains the audit user ID.
-This filter automatically sets the EV_CLEAR flag internally.
+contains the number of times the timeout has expired since the last arming or last delivery of the timer event.
+.Pp
+This filter automatically sets the
+.Dv EV_CLEAR
+flag.
.El
.Pp
----
returned and errno set.
.Pp
The
-.Fn kevent
-and
+.Fn kevent ,
.Fn kevent64
+and
+.Fn kevent_qos
system calls
return the number of events placed in the
.Fa eventlist ,
.Dv errno
will be set to indicate the error condition.
If the time limit expires, then
-.Fn kevent
-and
+.Fn kevent ,
.Fn kevent64
+and
+.Fn kevent_qos
return 0.
.Sh ERRORS
The