+.\"
+.\" Copyright (c) 2008-2016 Apple Inc. All rights reserved.
+.\"
+.\" @APPLE_LICENSE_HEADER_START@
+.\"
+.\" This file contains Original Code and/or Modifications of Original Code
+.\" as defined in and that are subject to the Apple Public Source License
+.\" Version 2.0 (the 'License'). You may not use this file except in
+.\" compliance with the License. Please obtain a copy of the License at
+.\" http://www.opensource.apple.com/apsl/ and read it before using this
+.\" file.
+.\"
+.\" The Original Code and all software distributed under the License are
+.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+.\" Please see the License for the specific language governing rights and
+.\" limitations under the License.
+.\"
+.\" @APPLE_LICENSE_HEADER_END@
+.\"
+.\"
.\" Copyright (c) 2000 Jonathan Lemon
.\" All rights reserved.
.\"
.\"
.\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.32 2002/12/19 09:40:25 ru Exp $
.\"
-.Dd April 14, 2000
+.Dd October 21, 2008
.Dt KQUEUE 2
.Os
.Sh NAME
.Nm kqueue ,
-.Nm kevent
+.Nm kevent ,
+.Nm kevent64
+and
+.Nm kevent_qos
.Nd kernel event notification mechanism
.Sh LIBRARY
.Lb libc
.Fn kqueue "void"
.Ft int
.Fn kevent "int kq" "const struct kevent *changelist" "int nchanges" "struct kevent *eventlist" "int nevents" "const struct timespec *timeout"
+.Ft int
+.Fn kevent64 "int kq" "const struct kevent64_s *changelist" "int nchanges" "struct kevent64_s *eventlist" "int nevents" "unsigned int flags" "const struct timespec *timeout"
+.Ft int
+.Fn kevent_qos "int kq" "const struct kevent_qos_s *changelist" "int nchanges" "struct kevent_qos_s *eventlist" "int nevents" "void *data_out" "size_t *data_available" "unsigned int flags"
.Fn EV_SET "&kev" ident filter flags fflags data udata
+.Fn EV_SET64 "&kev" ident filter flags fflags data udata "ext[0]" "ext[1]"
+.Fn EV_SET_QOS "&kev" ident filter flags qos udata fflags xflags data "ext[0]" "ext[1]" "ext[2]" "ext[3]"
.Sh DESCRIPTION
The
.Fn kqueue
-system call
-provides a generic method of notifying the user when an kernel
+system call allocates a kqueue file descriptor. This file descriptor
+provides a generic method of notifying the user when a kernel
event (kevent) happens or a condition holds, based on the results
of small pieces of kernel code termed filters.
-A kevent is identified by an (ident, filter) pair and specifies
-the interesting conditions to be notified about for that pair.
-An (ident, filter) pair can only appear once is a given kqueue.
-Subsequent attempts to register the same pair for a given kqueue
+.Pp
+A kevent is identified by an (ident, filter, and optional udata value)
+tuple. It specifies the interesting conditions to be notified about
+for that tuple. An (ident, filter, and optional udata value) tuple can
+only appear once in a given kqueue.
+Subsequent attempts to register the same tuple for a given kqueue
will result in the replacement of the conditions being watched,
not an addition.
+Whether the udata value is considered as part of the tuple is controlled
+by the EV_UDATA_SPECIFIC flag on the kevent.
.Pp
The filter identified in a kevent is executed upon the initial
registration of that event in order to detect whether a preexisting
.Xr fork 2 .
.Pp
The
-.Fn kevent
-system call
-is used to register events with the queue, and return any pending
+.Fn kevent,
+.Fn kevent64
+and
+.Fn kevent_qos
+system calls
+are used to register events with the queue, and return any pending
events to the user.
The
.Fa changelist
argument
is a pointer to an array of
-.Va kevent
+.Va kevent,
+.Va kevent64_s
+or
+.Va kevent_qos_s
structures, as defined in
.Aq Pa sys/event.h .
All changes contained in the
The
.Fa eventlist
argument
-is a pointer to an array of kevent structures.
+is a pointer to an array of out
+.Va kevent,
+.Va kevent64_s
+or
+.Va kevent_qos_s
+structures.
The
.Fa nevents
-argument
-determines the size of
+argument determines the size of
.Fa eventlist .
+If the KEVENT_FLAG_STACK_EVENTS flag is provided on the system call,
+the eventlist array is filled in in stack order (starting in the
+highest available index) instead of typical array order.
+The
+.Fa out_data
+argument provides space for extra out data provided by specific filters.
+The
+.Fa data_available
+argument's contents specified the space available in the data pool on input,
+and contains the amount still remaining on output.
+If the KEVENT_FLAG_STACK_DATA flag is specified on the system call,
+the data is allocated from the pool in stack order instead of typical heap order.
If
.Fa timeout
is a non-NULL pointer, it specifies a maximum interval to wait
for an event, which will be interpreted as a struct timespec. If
.Fa timeout
-is a NULL pointer,
+is a NULL pointer, both
.Fn kevent
-waits indefinitely. To effect a poll, the
+and
+.Fn kevent64
+wait indefinitely. To effect a poll, the
+.Fa flags
+argument to
+.Fn kevent64
+or
+.Fn kevent_qos
+can include the KEVENT_FLAG_IMMEDIATE value to indicate an
+immediate timeout. Alternatively, the
.Fa timeout
argument should be non-NULL, pointing to a zero-valued
.Va timespec
The
.Fn EV_SET
macro is provided for ease of initializing a
-kevent structure.
+.Va kevent
+structure. Similarly,
+.Fn EV_SET64
+initializes a
+.Va kevent64_s
+structure and
+.Fn EV_SET_QOS
+initializes a
+.Va kevent_qos_s
+structure.
.Pp
The
-.Va kevent
-structure is defined as:
+.Va kevent,
+.Va kevent64_s
+and
+.Va kevent_qos_s
+structures are defined as:
.Bd -literal
struct kevent {
- uintptr_t ident; /* identifier for this event */
- short filter; /* filter for event */
- u_short flags; /* action flags for kqueue */
- u_int fflags; /* filter flag value */
- intptr_t data; /* filter data value */
- void *udata; /* opaque user data identifier */
+ uintptr_t ident; /* identifier for this event */
+ int16_t filter; /* filter for event */
+ uint16_t flags; /* general flags */
+ uint32_t fflags; /* filter-specific flags */
+ intptr_t data; /* filter-specific data */
+ void *udata; /* opaque user data identifier */
+};
+
+struct kevent64_s {
+ uint64_t ident; /* identifier for this event */
+ int16_t filter; /* filter for event */
+ uint16_t flags; /* general flags */
+ uint32_t fflags; /* filter-specific flags */
+ int64_t data; /* filter-specific data */
+ uint64_t udata; /* opaque user data identifier */
+ uint64_t ext[2]; /* filter-specific extensions */
+};
+
+struct kevent_qos_s {
+ uint64_t ident; /* identifier for this event */
+ int16_t filter; /* filter for event */
+ uint16_t flags; /* general flags */
+ uint32_t qos; /* quality of service when servicing event */
+ uint64_t udata; /* opaque user data identifier */
+ uint32_t fflags; /* filter-specific flags */
+ uint32_t xflags; /* extra filter-specific flags */
+ int64_t data; /* filter-specific data */
+ uint64_t ext[4]; /* filter-specific extensions */
};
.Ed
.Pp
+----
+.Pp
The fields of
-.Fa struct kevent
+.Fa struct kevent,
+.Fa struct kevent64_s
+and
+.Fa struct kevent_qos_s
are:
.Bl -tag -width XXXfilter
.It ident
-Value used to identify this event.
+Value used to identify the source of the event.
The exact interpretation is determined by the attached filter,
but often is a file descriptor.
.It filter
.It data
Filter-specific data value.
.It udata
-Opaque user-defined value passed through the kernel unchanged.
+Opaque user-defined value passed through the kernel unchanged. It can
+optionally be part of the uniquing decision of the kevent system
+.El
+.Pp
+In addition,
+.Fa struct kevent64_s
+contains:
+.Bl -tag -width XXXfilter
+.It ext[2]
+This field stores extensions for the event's filter. What type of extension depends on
+what type of filter is being used.
.El
.Pp
+In addition,
+.Fa struct kevent_qos_s
+contains:
+.Bl -tag -width XXXfilter
+.It xflags
+Extra filter-specific flags.
+.It ext[4]
+The QoS variant provides twice as many extension values for filter-specific uses.
+.El
+.Pp
+----
+.Pp
The
.Va flags
field can contain the following values:
unless overridden by the EV_DISABLE flag.
.It EV_ENABLE
Permit
-.Fn kevent
+.Fn kevent,
+.Fn kevent64
+and
+.Fn kevent_qos
to return the event if it is triggered.
.It EV_DISABLE
Disable the event so
-.Fn kevent
+.Fn kevent,
+.Fn kevent64
+and
+.Fn kevent_qos
will not return it. The filter itself is not disabled.
.It EV_DELETE
Removes the event from the kqueue. Events which are attached to
file descriptors are automatically deleted on the last close of
the descriptor.
+.It EV_RECEIPT
+This flag is useful for making bulk changes to a kqueue without draining any
+pending events. When passed as input, it forces EV_ERROR to always be returned.
+When a filter is successfully added, the
+.Va data
+field will be zero.
.It EV_ONESHOT
Causes the event to return only the first occurrence of the filter
being triggered. After the user retrieves the event from the kqueue,
set this flag internally.
.It EV_EOF
Filters may set this flag to indicate filter-specific EOF condition.
+.It EV_OOBAND
+Read filter on socket may set this flag to indicate the presence of out of
+band data on the descriptor.
.It EV_ERROR
See
.Sx RETURN VALUES
below.
.El
.Pp
+----
+.Pp
The predefined system filters are listed below.
Arguments may be passed to and from the filter via the
+.Va data,
.Va fflags
-and
-.Va data
-fields in the kevent structure.
-.Bl -tag -width EVFILT_SIGNAL
+and optionally
+.Va xflags
+fields in the
+.Va kevent,
+.Va kevent64_s
+or
+.Va kevent_qos_s
+structure.
+.Bl -tag -width EVFILT_MACHPORT
.It EVFILT_READ
Takes a file descriptor as the identifier, and returns whenever
there is data available to read.
.Va fflags ,
and specifying the new low water mark in
.Va data .
+The derived per filter low water mark value is, however, bounded
+by socket receive buffer's high and low water mark values.
On return,
.Va data
contains the number of bytes of protocol data available to read.
.Pp
+The presence of EV_OOBAND in
+.Va flags ,
+indicates the presence of out of band data on the socket
+.Va data
+equal to the potential number of OOB bytes availble to read.
+.Pp
If the read direction of the socket has shutdown, then the filter
also sets EV_EOF in
.Va flags ,
contains the offset from current position to end of file,
and may be negative.
.It "Fifos, Pipes"
-Returns when the there is data to read;
+Returns when there is data to read;
.Va data
contains the number of bytes available.
.Pp
This may be cleared by passing in EV_CLEAR, at which point the
filter will resume waiting for data to become available before
returning.
+.It "Device nodes"
+Returns when there is data to read from the device;
+.Va data
+contains the number of bytes available. If the device does
+not support returning number of bytes, it will not allow the
+filter to be attached. However, if the NOTE_LOWAT flag is
+specified and the
+.Va data
+field contains 1 on input, those devices will attach - but
+cannot be relied upon to provide an accurate count of bytes
+to be read on output.
.El
+.It Dv EVFILT_EXCEPT
+Takes a descriptor as the identifier, and returns whenever one of the
+specified exceptional conditions has occurred on the descriptor. Conditions
+are specified in
+.Va fflags .
+Currently, this filter can be used to monitor the arrival of
+out-of-band data on a socket descriptor using the filter flag
+.Dv NOTE_OOB .
+.Pp
+If the read direction of the socket has shutdown, then the filter
+also sets EV_EOF in
+.Va flags ,
+and returns the socket error (if any) in
+.Va fflags .
.It EVFILT_WRITE
Takes a file descriptor as the identifier, and returns whenever
it is possible to write to the descriptor. For sockets, pipes
Access to the file was revoked via
.Xr revoke 2
or the underlying fileystem was unmounted.
+.It NOTE_FUNLOCK
+The file was unlocked by calling
+.Xr flock 2
+or
+.Xr close 2
.El
.Pp
On return,
.Va fflags
-contains the events which triggered the filter.
+contains the filter-specific flags which are associated with
+the triggered events seen by this filter.
.It EVFILT_PROC
Takes the process ID to monitor as the identifier and the events to watch for
in
and returns when the process performs one or more of the requested events.
If a process can normally see another process, it can attach an event to it.
The events to monitor are:
-.Bl -tag -width XXNOTE_TRACKERR
+.Bl -tag -width NOTE_SIGNAL
.It NOTE_EXIT
The process has exited.
+.It NOTE_EXITSTATUS
+The process has exited and its exit status is in filter specific data. Valid only on child processes and to be used along with NOTE_EXIT.
.It NOTE_FORK
-The process has called
-.Fn fork .
+The process created a child process via
+.Xr fork 2
+or similar call.
.It NOTE_EXEC
-The process has executed a new process via
+The process executed a new process via
.Xr execve 2
or similar call.
-.It NOTE_TRACK
-Follow a process across
-.Fn fork
-calls. The parent process will return with NOTE_TRACK set in the
-.Va fflags
-field, while the child process will return with NOTE_CHILD set in
-.Va fflags
-and the parent PID in
-.Va data .
-.It NOTE_TRACKERR
-This flag is returned if the system was unable to attach an event to
-the child process, usually due to resource limitations.
+.It NOTE_SIGNAL
+The process was sent a signal. Status can be checked via
+.Xr waitpid 2
+or similar call.
+.It NOTE_REAP
+The process was reaped by the parent via
+.Xr wait 2
+or similar call. Deprecated, use NOTE_EXIT.
.El
.Pp
On return,
contains the events which triggered the filter.
.It EVFILT_SIGNAL
Takes the signal number to monitor as the identifier and returns
-when the given signal is delivered to the process.
+when the given signal is generated for the process.
This coexists with the
.Fn signal
and
.Fn sigaction
-facilities, and has a lower precedence. The filter will record
+facilities, and has a lower precedence. Only signals sent to the process,
+not to a particular thread, will trigger the filter. The filter will record
all attempts to deliver a signal to a process, even if the signal has
-been marked as SIG_IGN. Event notification happens after normal
+been marked as SIG_IGN. Event notification happens before normal
signal delivery processing.
.Va data
-returns the number of times the signal has occurred since the last call to
+returns the number of times the signal has been generated since the last call to
.Fn kevent .
This filter automatically sets the EV_CLEAR flag internally.
+.It EVFILT_MACHPORT
+Takes the name of a mach port, or port set, in
+.Va ident
+and waits until a message is enqueued on the port or port set. When a message
+is detected, but not directly received by the kevent call, the name of the
+specific port where the message is enqueued is returned in
+.Va data .
+If
+.Va fflags
+contains MACH_RCV_MSG, the ext[0] and ext[1] flags are assumed to contain
+a pointer to the buffer where the message is to be received and the size
+of the receive buffer, respectively. If MACH_RCV_MSG is specifed, yet the
+buffer size in ext[1] is zero, The space for the buffer may be carved out
+of the
+.Va
+data_out
+area provided to
+.Fn kevent_qos
+if there is enough space remaining there.
.It EVFILT_TIMER
-This filter is currently unsupported.
-.\"Establishes an arbitrary timer identified by
-.\".Va ident .
-.\"When adding a timer,
-.\".Va data
-.\"specifies the timeout period in milliseconds.
-.\"The timer will be periodic unless EV_ONESHOT is specified.
-.\"On return,
-.\".Va data
-.\"contains the number of times the timeout has expired since the last call to
-.\".Fn kevent .
-.\"This filter automatically sets the EV_CLEAR flag internally.
+Establishes an interval timer identified by
+.Va ident
+where
+.Va data
+specifies the timeout period (in milliseconds).
+.Pp
+.Va fflags
+can include one of the following flags to specify a different unit:
+.Bl -tag -width NOTE_NSECONDS
+.It NOTE_SECONDS
+.Va data
+is in seconds
+.It NOTE_USECONDS
+.Va data
+is in microseconds
+.It NOTE_NSECONDS
+.Va data
+is in nanoseconds
+.It NOTE_MACHTIME
+.Va data
+is in Mach absolute time units
+.El
+.Pp
+.Va fflags
+can also include
+.Dv NOTE_ABSOLUTE,
+which establishes an
+.Dv EV_ONESHOT
+timer with an absolute deadline instead of an interval.
+The absolute deadline is expressed in terms of
+.Xr gettimeofday 2 .
+With
+.Dv NOTE_MACHTIME,
+the deadline is expressed in terms of
+.Fn mach_absolute_time .
+.Pp
+The timer can be coalesced with other timers to save power. The following flags can be set in
+.Va fflags
+to modify this behavior:
+.Bl -tag -width NOTE_BACKGROUND
+.It NOTE_CRITICAL
+override default power-saving techniques to more strictly respect the leeway value
+.It NOTE_BACKGROUND
+apply more power-saving techniques to coalesce this timer with other timers
+.It NOTE_LEEWAY
+.Va ext[1]
+holds user-supplied slop in deadline for timer coalescing.
+.El
+.Pp
+The timer will be periodic unless
+.Dv EV_ONESHOT
+is specified.
+On return,
+.Va data
+contains the number of times the timeout has expired since the last arming or last delivery of the timer event.
+.Pp
+This filter automatically sets the
+.Dv EV_CLEAR
+flag.
.El
+.Pp
+----
+.Pp
+In the
+.Va ext[2]
+field of the
+.Va kevent64_s
+struture,
+.Va ext[0]
+is only used with the EVFILT_MACHPORT filter.
+With other filters,
+.Va ext[0]
+is passed through
+.Fn kevent64
+much like
+.Va udata .
+.Va ext[1]
+can always be used like
+.Va udata .
+For the use of ext[0], see the EVFILT_MACHPORT filter above.
.Sh RETURN VALUES
The
.Fn kqueue
returned and errno set.
.Pp
The
-.Fn kevent
-system call
-returns the number of events placed in the
+.Fn kevent ,
+.Fn kevent64
+and
+.Fn kevent_qos
+system calls
+return the number of events placed in the
.Fa eventlist ,
up to the value given by
.Fa nevents .
.Dv errno
will be set to indicate the error condition.
If the time limit expires, then
-.Fn kevent
-returns 0.
+.Fn kevent ,
+.Fn kevent64
+and
+.Fn kevent_qos
+return 0.
.Sh ERRORS
The
.Fn kqueue
.Pp
The
.Fn kevent
-system call fails if:
+and
+.Fn kevent64
+system calls fail if:
.Bl -tag -width Er
.It Bq Er EACCES
The process does not have permission to register a filter.
.It Bq Er EFAULT
There was an error reading or writing the
.Va kevent
+or
+.Va kevent64_s
structure.
.It Bq Er EBADF
The specified descriptor is invalid.