]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-3789.21.4.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
2d21ac55
A
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
1c79356b
A
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/filedesc.h>
78#include <sys/ioctl.h>
91447636
A
79#include <sys/file_internal.h>
80#include <sys/proc_internal.h>
1c79356b 81#include <sys/socketvar.h>
91447636 82#include <sys/uio_internal.h>
1c79356b 83#include <sys/kernel.h>
fe8ab488 84#include <sys/guarded.h>
1c79356b
A
85#include <sys/stat.h>
86#include <sys/malloc.h>
91447636 87#include <sys/sysproto.h>
1c79356b 88
91447636 89#include <sys/mount_internal.h>
1c79356b
A
90#include <sys/protosw.h>
91#include <sys/ev.h>
92#include <sys/user.h>
93#include <sys/kdebug.h>
91447636
A
94#include <sys/poll.h>
95#include <sys/event.h>
96#include <sys/eventvar.h>
316670eb 97#include <sys/proc.h>
39236c6e 98#include <sys/kauth.h>
91447636
A
99
100#include <mach/mach_types.h>
101#include <kern/kern_types.h>
1c79356b 102#include <kern/assert.h>
91447636
A
103#include <kern/kalloc.h>
104#include <kern/thread.h>
105#include <kern/clock.h>
316670eb
A
106#include <kern/ledger.h>
107#include <kern/task.h>
39236c6e 108#include <kern/telemetry.h>
3e170ce0
A
109#include <kern/waitq.h>
110#include <kern/sched_prim.h>
1c79356b
A
111
112#include <sys/mbuf.h>
39236c6e 113#include <sys/domain.h>
1c79356b
A
114#include <sys/socket.h>
115#include <sys/socketvar.h>
116#include <sys/errno.h>
55e303ae 117#include <sys/syscall.h>
91447636 118#include <sys/pipe.h>
1c79356b 119
b0d623f7 120#include <security/audit/audit.h>
e5568f75 121
1c79356b
A
122#include <net/if.h>
123#include <net/route.h>
124
125#include <netinet/in.h>
126#include <netinet/in_systm.h>
127#include <netinet/ip.h>
128#include <netinet/in_pcb.h>
129#include <netinet/ip_var.h>
130#include <netinet/ip6.h>
131#include <netinet/tcp.h>
132#include <netinet/tcp_fsm.h>
133#include <netinet/tcp_seq.h>
134#include <netinet/tcp_timer.h>
135#include <netinet/tcp_var.h>
136#include <netinet/tcpip.h>
137#include <netinet/tcp_debug.h>
0b4e3aa0 138/* for wait queue based select */
3e170ce0 139#include <kern/waitq.h>
91447636 140#include <kern/kalloc.h>
91447636
A
141#include <sys/vnode_internal.h>
142
2d21ac55
A
143/* XXX should be in a header file somewhere */
144void evsofree(struct socket *);
145void evpipefree(struct pipe *);
146void postpipeevent(struct pipe *, int);
147void postevent(struct socket *, struct sockbuf *, int);
148extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
149
91447636 150int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
fe8ab488 151int wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval);
91447636 152
2d21ac55 153__private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
91447636
A
154 user_addr_t bufp, user_size_t nbyte,
155 off_t offset, int flags, user_ssize_t *retval);
2d21ac55 156__private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
91447636
A
157 user_addr_t bufp, user_size_t nbyte,
158 off_t offset, int flags, user_ssize_t *retval);
159__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
160__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
9bccf70c 161
6d2010ae
A
162
163/* Conflict wait queue for when selects collide (opaque type) */
3e170ce0 164struct waitq select_conflict_queue;
6d2010ae
A
165
166/*
167 * Init routine called from bsd_init.c
168 */
3e170ce0 169void select_waitq_init(void);
6d2010ae 170void
3e170ce0 171select_waitq_init(void)
6d2010ae 172{
39037602 173 waitq_init(&select_conflict_queue, SYNC_POLICY_FIFO);
6d2010ae
A
174}
175
91447636 176#define f_flag f_fglob->fg_flag
39236c6e 177#define f_type f_fglob->fg_ops->fo_type
91447636
A
178#define f_msgcount f_fglob->fg_msgcount
179#define f_cred f_fglob->fg_cred
180#define f_ops f_fglob->fg_ops
181#define f_offset f_fglob->fg_offset
182#define f_data f_fglob->fg_data
2d21ac55 183
1c79356b
A
184/*
185 * Read system call.
2d21ac55
A
186 *
187 * Returns: 0 Success
188 * preparefileread:EBADF
189 * preparefileread:ESPIPE
190 * preparefileread:ENXIO
191 * preparefileread:EBADF
192 * dofileread:???
1c79356b 193 */
9bccf70c 194int
2d21ac55
A
195read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
196{
197 __pthread_testcancel(1);
198 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
199}
200
201int
202read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 203{
91447636 204 struct fileproc *fp;
9bccf70c 205 int error;
91447636 206 int fd = uap->fd;
b0d623f7 207 struct vfs_context context;
91447636
A
208
209 if ( (error = preparefileread(p, &fp, fd, 0)) )
210 return (error);
9bccf70c 211
b0d623f7
A
212 context = *(vfs_context_current());
213 context.vc_ucred = fp->f_fglob->fg_cred;
214
215 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
91447636
A
216 (off_t)-1, 0, retval);
217
218 donefileread(p, fp, fd);
219
220 return (error);
9bccf70c
A
221}
222
223/*
224 * Pread system call
2d21ac55
A
225 *
226 * Returns: 0 Success
227 * preparefileread:EBADF
228 * preparefileread:ESPIPE
229 * preparefileread:ENXIO
230 * preparefileread:EBADF
231 * dofileread:???
9bccf70c 232 */
9bccf70c 233int
2d21ac55 234pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
9bccf70c 235{
2d21ac55
A
236 __pthread_testcancel(1);
237 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
238}
239
240int
241pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
242{
243 struct fileproc *fp = NULL; /* fp set by preparefileread() */
91447636 244 int fd = uap->fd;
9bccf70c 245 int error;
b0d623f7 246 struct vfs_context context;
9bccf70c 247
91447636 248 if ( (error = preparefileread(p, &fp, fd, 1)) )
4a3eedf9 249 goto out;
91447636 250
b0d623f7
A
251 context = *(vfs_context_current());
252 context.vc_ucred = fp->f_fglob->fg_cred;
253
254 error = dofileread(&context, fp, uap->buf, uap->nbyte,
91447636 255 uap->offset, FOF_OFFSET, retval);
55e303ae 256
91447636
A
257 donefileread(p, fp, fd);
258
b7266188 259 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
55e303ae 260 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
4a3eedf9
A
261
262out:
91447636 263 return (error);
9bccf70c
A
264}
265
266/*
267 * Code common for read and pread
268 */
91447636
A
269
270void
271donefileread(struct proc *p, struct fileproc *fp, int fd)
272{
2d21ac55 273 proc_fdlock_spin(p);
91447636
A
274 fp_drop(p, fd, fp, 1);
275 proc_fdunlock(p);
276}
277
2d21ac55
A
278/*
279 * Returns: 0 Success
280 * EBADF
281 * ESPIPE
282 * ENXIO
283 * fp_lookup:EBADF
284 * fo_read:???
285 */
91447636
A
286int
287preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
288{
289 vnode_t vp;
290 int error;
291 struct fileproc *fp;
292
b0d623f7
A
293 AUDIT_ARG(fd, fd);
294
2d21ac55 295 proc_fdlock_spin(p);
91447636
A
296
297 error = fp_lookup(p, fd, &fp, 1);
298
299 if (error) {
300 proc_fdunlock(p);
301 return (error);
302 }
303 if ((fp->f_flag & FREAD) == 0) {
304 error = EBADF;
305 goto out;
306 }
307 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
308 error = ESPIPE;
309 goto out;
310 }
311 if (fp->f_type == DTYPE_VNODE) {
312 vp = (struct vnode *)fp->f_fglob->fg_data;
313
2d21ac55
A
314 if (check_for_pread && (vnode_isfifo(vp))) {
315 error = ESPIPE;
316 goto out;
317 }
318 if (check_for_pread && (vp->v_flag & VISTTY)) {
319 error = ENXIO;
320 goto out;
321 }
91447636
A
322 }
323
324 *fp_ret = fp;
325
326 proc_fdunlock(p);
327 return (0);
328
329out:
330 fp_drop(p, fd, fp, 1);
331 proc_fdunlock(p);
332 return (error);
333}
334
335
2d21ac55
A
336/*
337 * Returns: 0 Success
338 * EINVAL
339 * fo_read:???
340 */
55e303ae 341__private_extern__ int
2d21ac55
A
342dofileread(vfs_context_t ctx, struct fileproc *fp,
343 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
344 user_ssize_t *retval)
1c79356b 345{
91447636
A
346 uio_t auio;
347 user_ssize_t bytecnt;
348 long error = 0;
349 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 350
9bccf70c
A
351 if (nbyte > INT_MAX)
352 return (EINVAL);
91447636 353
2d21ac55 354 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
355 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
356 &uio_buf[0], sizeof(uio_buf));
357 } else {
358 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
359 &uio_buf[0], sizeof(uio_buf));
360 }
361 uio_addiov(auio, bufp, nbyte);
362
91447636 363 bytecnt = nbyte;
9bccf70c 364
2d21ac55 365 if ((error = fo_read(fp, auio, flags, ctx))) {
91447636 366 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
367 error == EINTR || error == EWOULDBLOCK))
368 error = 0;
369 }
91447636 370 bytecnt -= uio_resid(auio);
91447636
A
371
372 *retval = bytecnt;
373
9bccf70c 374 return (error);
1c79356b
A
375}
376
9bccf70c
A
377/*
378 * Scatter read system call.
2d21ac55
A
379 *
380 * Returns: 0 Success
381 * EINVAL
382 * ENOMEM
383 * copyin:EFAULT
384 * rd_uio:???
9bccf70c 385 */
9bccf70c 386int
2d21ac55
A
387readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
388{
389 __pthread_testcancel(1);
390 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
391}
392
393int
394readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
1c79356b 395{
91447636 396 uio_t auio = NULL;
1c79356b 397 int error;
91447636
A
398 struct user_iovec *iovp;
399
400 /* Verify range bedfore calling uio_create() */
401 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
402 return (EINVAL);
403
404 /* allocate a uio large enough to hold the number of iovecs passed */
405 auio = uio_create(uap->iovcnt, 0,
406 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
407 UIO_READ);
408
409 /* get location of iovecs within the uio. then copyin the iovecs from
410 * user space.
411 */
412 iovp = uio_iovsaddr(auio);
413 if (iovp == NULL) {
414 error = ENOMEM;
415 goto ExitThisRoutine;
416 }
b0d623f7
A
417 error = copyin_user_iovec_array(uap->iovp,
418 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
419 uap->iovcnt, iovp);
91447636
A
420 if (error) {
421 goto ExitThisRoutine;
422 }
423
424 /* finalize uio_t for use and do the IO
425 */
39236c6e
A
426 error = uio_calculateresid(auio);
427 if (error) {
428 goto ExitThisRoutine;
429 }
91447636
A
430 error = rd_uio(p, uap->fd, auio, retval);
431
432ExitThisRoutine:
433 if (auio != NULL) {
434 uio_free(auio);
435 }
1c79356b
A
436 return (error);
437}
438
439/*
440 * Write system call
2d21ac55
A
441 *
442 * Returns: 0 Success
443 * EBADF
444 * fp_lookup:EBADF
445 * dofilewrite:???
1c79356b 446 */
9bccf70c 447int
2d21ac55
A
448write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
449{
450 __pthread_testcancel(1);
451 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
452
453}
454
455int
456write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
1c79356b 457{
91447636 458 struct fileproc *fp;
9bccf70c 459 int error;
91447636 460 int fd = uap->fd;
fe8ab488 461 bool wrote_some = false;
9bccf70c 462
b0d623f7
A
463 AUDIT_ARG(fd, fd);
464
91447636
A
465 error = fp_lookup(p,fd,&fp,0);
466 if (error)
467 return(error);
468 if ((fp->f_flag & FWRITE) == 0) {
469 error = EBADF;
fe8ab488
A
470 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
471 proc_fdlock(p);
472 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
473 proc_fdunlock(p);
91447636 474 } else {
2d21ac55
A
475 struct vfs_context context = *(vfs_context_current());
476 context.vc_ucred = fp->f_fglob->fg_cred;
477
478 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
9bccf70c 479 (off_t)-1, 0, retval);
fe8ab488
A
480
481 wrote_some = *retval > 0;
91447636 482 }
fe8ab488 483 if (wrote_some)
91447636
A
484 fp_drop_written(p, fd, fp);
485 else
486 fp_drop(p, fd, fp, 0);
9bccf70c
A
487 return(error);
488}
489
490/*
91447636 491 * pwrite system call
2d21ac55
A
492 *
493 * Returns: 0 Success
494 * EBADF
495 * ESPIPE
496 * ENXIO
497 * EINVAL
498 * fp_lookup:EBADF
499 * dofilewrite:???
9bccf70c 500 */
9bccf70c 501int
2d21ac55
A
502pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
503{
504 __pthread_testcancel(1);
505 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
506}
507
508int
509pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 510{
91447636 511 struct fileproc *fp;
9bccf70c 512 int error;
91447636 513 int fd = uap->fd;
2d21ac55 514 vnode_t vp = (vnode_t)0;
fe8ab488 515 bool wrote_some = false;
91447636 516
b0d623f7
A
517 AUDIT_ARG(fd, fd);
518
91447636
A
519 error = fp_lookup(p,fd,&fp,0);
520 if (error)
521 return(error);
9bccf70c 522
91447636
A
523 if ((fp->f_flag & FWRITE) == 0) {
524 error = EBADF;
fe8ab488
A
525 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
526 proc_fdlock(p);
527 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
528 proc_fdunlock(p);
91447636 529 } else {
2d21ac55
A
530 struct vfs_context context = *vfs_context_current();
531 context.vc_ucred = fp->f_fglob->fg_cred;
532
91447636
A
533 if (fp->f_type != DTYPE_VNODE) {
534 error = ESPIPE;
2d21ac55
A
535 goto errout;
536 }
537 vp = (vnode_t)fp->f_fglob->fg_data;
538 if (vnode_isfifo(vp)) {
539 error = ESPIPE;
540 goto errout;
541 }
542 if ((vp->v_flag & VISTTY)) {
543 error = ENXIO;
544 goto errout;
91447636 545 }
2d21ac55
A
546 if (uap->offset == (off_t)-1) {
547 error = EINVAL;
548 goto errout;
549 }
550
551 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
552 uap->offset, FOF_OFFSET, retval);
fe8ab488 553 wrote_some = *retval > 0;
9bccf70c 554 }
2d21ac55 555errout:
fe8ab488 556 if (wrote_some)
91447636
A
557 fp_drop_written(p, fd, fp);
558 else
559 fp_drop(p, fd, fp, 0);
55e303ae 560
b7266188 561 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
55e303ae
A
562 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
563
9bccf70c
A
564 return(error);
565}
566
2d21ac55
A
567/*
568 * Returns: 0 Success
569 * EINVAL
570 * <fo_write>:EPIPE
571 * <fo_write>:??? [indirect through struct fileops]
572 */
55e303ae 573__private_extern__ int
2d21ac55
A
574dofilewrite(vfs_context_t ctx, struct fileproc *fp,
575 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
576 user_ssize_t *retval)
9bccf70c 577{
91447636
A
578 uio_t auio;
579 long error = 0;
580 user_ssize_t bytecnt;
581 char uio_buf[ UIO_SIZEOF(1) ];
91447636 582
fe8ab488
A
583 if (nbyte > INT_MAX) {
584 *retval = 0;
9bccf70c 585 return (EINVAL);
fe8ab488 586 }
91447636 587
2d21ac55 588 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
589 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
590 &uio_buf[0], sizeof(uio_buf));
591 } else {
592 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
593 &uio_buf[0], sizeof(uio_buf));
594 }
595 uio_addiov(auio, bufp, nbyte);
596
91447636 597 bytecnt = nbyte;
2d21ac55 598 if ((error = fo_write(fp, auio, flags, ctx))) {
91447636 599 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
600 error == EINTR || error == EWOULDBLOCK))
601 error = 0;
55e303ae 602 /* The socket layer handles SIGPIPE */
6d2010ae
A
603 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
604 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) {
2d21ac55
A
605 /* XXX Raise the signal on the thread? */
606 psignal(vfs_context_proc(ctx), SIGPIPE);
607 }
9bccf70c 608 }
91447636 609 bytecnt -= uio_resid(auio);
91447636
A
610 *retval = bytecnt;
611
9bccf70c 612 return (error);
1c79356b 613}
9bccf70c
A
614
615/*
616 * Gather write system call
617 */
9bccf70c 618int
2d21ac55
A
619writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
620{
621 __pthread_testcancel(1);
622 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
623}
624
625int
626writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
1c79356b 627{
91447636 628 uio_t auio = NULL;
1c79356b 629 int error;
fe8ab488 630 struct fileproc *fp;
91447636 631 struct user_iovec *iovp;
fe8ab488 632 bool wrote_some = false;
91447636 633
b0d623f7
A
634 AUDIT_ARG(fd, uap->fd);
635
91447636
A
636 /* Verify range bedfore calling uio_create() */
637 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
638 return (EINVAL);
639
640 /* allocate a uio large enough to hold the number of iovecs passed */
641 auio = uio_create(uap->iovcnt, 0,
642 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
643 UIO_WRITE);
644
645 /* get location of iovecs within the uio. then copyin the iovecs from
646 * user space.
647 */
648 iovp = uio_iovsaddr(auio);
649 if (iovp == NULL) {
650 error = ENOMEM;
651 goto ExitThisRoutine;
652 }
b0d623f7
A
653 error = copyin_user_iovec_array(uap->iovp,
654 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
655 uap->iovcnt, iovp);
91447636
A
656 if (error) {
657 goto ExitThisRoutine;
658 }
659
660 /* finalize uio_t for use and do the IO
661 */
39236c6e
A
662 error = uio_calculateresid(auio);
663 if (error) {
664 goto ExitThisRoutine;
665 }
fe8ab488
A
666
667 error = fp_lookup(p, uap->fd, &fp, 0);
668 if (error)
669 goto ExitThisRoutine;
670
671 if ((fp->f_flag & FWRITE) == 0) {
672 error = EBADF;
673 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
674 proc_fdlock(p);
675 error = fp_guard_exception(p, uap->fd, fp, kGUARD_EXC_WRITE);
676 proc_fdunlock(p);
677 } else {
678 error = wr_uio(p, fp, auio, retval);
679 wrote_some = *retval > 0;
680 }
681
682 if (wrote_some)
683 fp_drop_written(p, uap->fd, fp);
684 else
685 fp_drop(p, uap->fd, fp, 0);
91447636
A
686
687ExitThisRoutine:
688 if (auio != NULL) {
689 uio_free(auio);
690 }
1c79356b
A
691 return (error);
692}
693
91447636 694
9bccf70c 695int
fe8ab488 696wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval)
1c79356b 697{
91447636
A
698 int error;
699 user_ssize_t count;
2d21ac55 700 struct vfs_context context = *vfs_context_current();
1c79356b 701
91447636 702 count = uio_resid(uio);
2d21ac55
A
703
704 context.vc_ucred = fp->f_cred;
705 error = fo_write(fp, uio, 0, &context);
91447636
A
706 if (error) {
707 if (uio_resid(uio) != count && (error == ERESTART ||
708 error == EINTR || error == EWOULDBLOCK))
709 error = 0;
710 /* The socket layer handles SIGPIPE */
6d2010ae
A
711 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
712 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0)
91447636
A
713 psignal(p, SIGPIPE);
714 }
715 *retval = count - uio_resid(uio);
716
91447636
A
717 return(error);
718}
719
720
721int
2d21ac55 722rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
91447636
A
723{
724 struct fileproc *fp;
725 int error;
726 user_ssize_t count;
2d21ac55 727 struct vfs_context context = *vfs_context_current();
91447636
A
728
729 if ( (error = preparefileread(p, &fp, fdes, 0)) )
730 return (error);
731
732 count = uio_resid(uio);
2d21ac55
A
733
734 context.vc_ucred = fp->f_cred;
735
736 error = fo_read(fp, uio, 0, &context);
9bccf70c 737
91447636
A
738 if (error) {
739 if (uio_resid(uio) != count && (error == ERESTART ||
740 error == EINTR || error == EWOULDBLOCK))
741 error = 0;
1c79356b 742 }
91447636 743 *retval = count - uio_resid(uio);
9bccf70c 744
91447636 745 donefileread(p, fp, fdes);
9bccf70c 746
91447636 747 return (error);
1c79356b
A
748}
749
750/*
751 * Ioctl system call
91447636 752 *
2d21ac55
A
753 * Returns: 0 Success
754 * EBADF
755 * ENOTTY
756 * ENOMEM
757 * ESRCH
758 * copyin:EFAULT
759 * copyoutEFAULT
760 * fp_lookup:EBADF Bad file descriptor
761 * fo_ioctl:???
1c79356b 762 */
9bccf70c 763int
b0d623f7 764ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
1c79356b 765{
39236c6e 766 struct fileproc *fp = NULL;
91447636 767 int error = 0;
39236c6e
A
768 u_int size = 0;
769 caddr_t datap = NULL, memp = NULL;
770 boolean_t is64bit = FALSE;
771 int tmp = 0;
1c79356b
A
772#define STK_PARAMS 128
773 char stkbuf[STK_PARAMS];
91447636 774 int fd = uap->fd;
39236c6e 775 u_long com = uap->com;
2d21ac55 776 struct vfs_context context = *vfs_context_current();
1c79356b 777
e5568f75 778 AUDIT_ARG(fd, uap->fd);
e5568f75 779 AUDIT_ARG(addr, uap->data);
91447636
A
780
781 is64bit = proc_is64bit(p);
b0d623f7
A
782#if CONFIG_AUDIT
783 if (is64bit)
39236c6e 784 AUDIT_ARG(value64, com);
b0d623f7 785 else
39236c6e 786 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com));
b0d623f7 787#endif /* CONFIG_AUDIT */
91447636 788
1c79356b
A
789 /*
790 * Interpret high order word to find amount of data to be
791 * copied to/from the user's address space.
792 */
793 size = IOCPARM_LEN(com);
39236c6e
A
794 if (size > IOCPARM_MAX)
795 return ENOTTY;
1c79356b 796 if (size > sizeof (stkbuf)) {
39236c6e
A
797 if ((memp = (caddr_t)kalloc(size)) == 0)
798 return ENOMEM;
91447636 799 datap = memp;
1c79356b 800 } else
91447636 801 datap = &stkbuf[0];
39236c6e 802 if (com & IOC_IN) {
1c79356b 803 if (size) {
91447636 804 error = copyin(uap->data, datap, size);
39236c6e
A
805 if (error)
806 goto out_nofp;
91447636
A
807 } else {
808 /* XXX - IOC_IN and no size? we should proably return an error here!! */
809 if (is64bit) {
810 *(user_addr_t *)datap = uap->data;
811 }
812 else {
813 *(uint32_t *)datap = (uint32_t)uap->data;
814 }
815 }
39236c6e 816 } else if ((com & IOC_OUT) && size)
1c79356b
A
817 /*
818 * Zero the buffer so the user always
819 * gets back something deterministic.
820 */
91447636 821 bzero(datap, size);
39236c6e 822 else if (com & IOC_VOID) {
91447636
A
823 /* XXX - this is odd since IOC_VOID means no parameters */
824 if (is64bit) {
825 *(user_addr_t *)datap = uap->data;
826 }
827 else {
828 *(uint32_t *)datap = (uint32_t)uap->data;
829 }
830 }
1c79356b 831
39236c6e
A
832 proc_fdlock(p);
833 error = fp_lookup(p,fd,&fp,1);
834 if (error) {
835 proc_fdunlock(p);
836 goto out_nofp;
837 }
838
839 AUDIT_ARG(file, p, fp);
840
841 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
842 error = EBADF;
843 goto out;
844 }
845
846 context.vc_ucred = fp->f_fglob->fg_cred;
847
848#if CONFIG_MACF
849 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, com);
850 if (error)
851 goto out;
852#endif
853
1c79356b 854 switch (com) {
39236c6e
A
855 case FIONCLEX:
856 *fdflags(p, fd) &= ~UF_EXCLOSE;
857 break;
858
859 case FIOCLEX:
860 *fdflags(p, fd) |= UF_EXCLOSE;
861 break;
1c79356b
A
862
863 case FIONBIO:
91447636 864 if ( (tmp = *(int *)datap) )
1c79356b
A
865 fp->f_flag |= FNONBLOCK;
866 else
867 fp->f_flag &= ~FNONBLOCK;
2d21ac55 868 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1c79356b
A
869 break;
870
871 case FIOASYNC:
91447636 872 if ( (tmp = *(int *)datap) )
1c79356b
A
873 fp->f_flag |= FASYNC;
874 else
875 fp->f_flag &= ~FASYNC;
2d21ac55 876 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1c79356b
A
877 break;
878
879 case FIOSETOWN:
91447636 880 tmp = *(int *)datap;
1c79356b
A
881 if (fp->f_type == DTYPE_SOCKET) {
882 ((struct socket *)fp->f_data)->so_pgid = tmp;
1c79356b
A
883 break;
884 }
91447636 885 if (fp->f_type == DTYPE_PIPE) {
2d21ac55 886 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
91447636
A
887 break;
888 }
1c79356b
A
889 if (tmp <= 0) {
890 tmp = -tmp;
891 } else {
2d21ac55 892 struct proc *p1 = proc_find(tmp);
1c79356b
A
893 if (p1 == 0) {
894 error = ESRCH;
895 break;
896 }
2d21ac55
A
897 tmp = p1->p_pgrpid;
898 proc_rele(p1);
1c79356b 899 }
2d21ac55 900 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
1c79356b
A
901 break;
902
903 case FIOGETOWN:
904 if (fp->f_type == DTYPE_SOCKET) {
91447636 905 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
1c79356b
A
906 break;
907 }
2d21ac55 908 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
91447636 909 *(int *)datap = -*(int *)datap;
1c79356b
A
910 break;
911
912 default:
2d21ac55 913 error = fo_ioctl(fp, com, datap, &context);
1c79356b
A
914 /*
915 * Copy any data to user, size was
916 * already set and checked above.
917 */
39236c6e 918 if (error == 0 && (com & IOC_OUT) && size)
91447636 919 error = copyout(datap, uap->data, (u_int)size);
1c79356b
A
920 break;
921 }
91447636
A
922out:
923 fp_drop(p, fd, fp, 1);
924 proc_fdunlock(p);
39236c6e
A
925
926out_nofp:
927 if (memp)
928 kfree(memp, size);
91447636 929 return(error);
1c79356b
A
930}
931
1c79356b 932int selwait, nselcoll;
0b4e3aa0
A
933#define SEL_FIRSTPASS 1
934#define SEL_SECONDPASS 2
9bccf70c
A
935extern int selcontinue(int error);
936extern int selprocess(int error, int sel_pass);
fe8ab488 937static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata,
3e170ce0 938 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset);
6d2010ae
A
939static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
940static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
91447636 941static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
4bd07ac2 942static int select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval);
1c79356b
A
943
944/*
945 * Select system call.
2d21ac55
A
946 *
947 * Returns: 0 Success
948 * EINVAL Invalid argument
949 * EAGAIN Nonconformant error if allocation fails
1c79356b 950 */
9bccf70c 951int
b0d623f7 952select(struct proc *p, struct select_args *uap, int32_t *retval)
2d21ac55
A
953{
954 __pthread_testcancel(1);
4bd07ac2 955 return select_nocancel(p, (struct select_nocancel_args *)uap, retval);
2d21ac55
A
956}
957
958int
b0d623f7 959select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
4bd07ac2
A
960{
961 uint64_t timeout = 0;
962
963 if (uap->tv) {
964 int err;
965 struct timeval atv;
966 if (IS_64BIT_PROCESS(p)) {
967 struct user64_timeval atv64;
968 err = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
969 /* Loses resolution - assume timeout < 68 years */
970 atv.tv_sec = atv64.tv_sec;
971 atv.tv_usec = atv64.tv_usec;
972 } else {
973 struct user32_timeval atv32;
974 err = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
975 atv.tv_sec = atv32.tv_sec;
976 atv.tv_usec = atv32.tv_usec;
977 }
978 if (err)
979 return err;
980
981 if (itimerfix(&atv)) {
982 err = EINVAL;
983 return err;
984 }
985
986 clock_absolutetime_interval_to_deadline(tvtoabstime(&atv), &timeout);
987 }
988
989 return select_internal(p, uap, timeout, retval);
990}
991
992int
993pselect(struct proc *p, struct pselect_args *uap, int32_t *retval)
994{
995 __pthread_testcancel(1);
996 return pselect_nocancel(p, (struct pselect_nocancel_args *)uap, retval);
997}
998
999int
1000pselect_nocancel(struct proc *p, struct pselect_nocancel_args *uap, int32_t *retval)
1001{
1002 int err;
1003 struct uthread *ut;
1004 uint64_t timeout = 0;
1005
1006 if (uap->ts) {
1007 struct timespec ts;
1008
1009 if (IS_64BIT_PROCESS(p)) {
1010 struct user64_timespec ts64;
1011 err = copyin(uap->ts, (caddr_t)&ts64, sizeof(ts64));
1012 ts.tv_sec = ts64.tv_sec;
1013 ts.tv_nsec = ts64.tv_nsec;
1014 } else {
1015 struct user32_timespec ts32;
1016 err = copyin(uap->ts, (caddr_t)&ts32, sizeof(ts32));
1017 ts.tv_sec = ts32.tv_sec;
1018 ts.tv_nsec = ts32.tv_nsec;
1019 }
1020 if (err) {
1021 return err;
1022 }
1023
1024 if (!timespec_is_valid(&ts)) {
1025 return EINVAL;
1026 }
1027 clock_absolutetime_interval_to_deadline(tstoabstime(&ts), &timeout);
1028 }
1029
1030 ut = get_bsdthread_info(current_thread());
1031
1032 if (uap->mask != USER_ADDR_NULL) {
1033 /* save current mask, then copyin and set new mask */
1034 sigset_t newset;
1035 err = copyin(uap->mask, &newset, sizeof(sigset_t));
1036 if (err) {
1037 return err;
1038 }
1039 ut->uu_oldmask = ut->uu_sigmask;
1040 ut->uu_flag |= UT_SAS_OLDMASK;
1041 ut->uu_sigmask = (newset & ~sigcantmask);
1042 }
1043
1044 err = select_internal(p, (struct select_nocancel_args *)uap, timeout, retval);
1045
1046 if (err != EINTR && ut->uu_flag & UT_SAS_OLDMASK) {
1047 /*
1048 * Restore old mask (direct return case). NOTE: EINTR can also be returned
1049 * if the thread is cancelled. In that case, we don't reset the signal
1050 * mask to its original value (which usually happens in the signal
1051 * delivery path). This behavior is permitted by POSIX.
1052 */
1053 ut->uu_sigmask = ut->uu_oldmask;
1054 ut->uu_oldmask = 0;
1055 ut->uu_flag &= ~UT_SAS_OLDMASK;
1056 }
1057
1058 return err;
1059}
1060
1061/*
1062 * Generic implementation of {,p}select. Care: we type-pun uap across the two
1063 * syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
1064 * are identical. The 5th (timeout) argument points to different types, so we
1065 * unpack in the syscall-specific code, but the generic code still does a null
1066 * check on this argument to determine if a timeout was specified.
1067 */
1068static int
1069select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval)
1c79356b 1070{
9bccf70c 1071 int error = 0;
3e170ce0 1072 u_int ni, nw;
91447636 1073 thread_t th_act;
1c79356b
A
1074 struct uthread *uth;
1075 struct _select *sel;
fe8ab488 1076 struct _select_data *seldata;
1c79356b 1077 int needzerofill = 1;
0b4e3aa0 1078 int count = 0;
3e170ce0 1079 size_t sz = 0;
1c79356b 1080
91447636 1081 th_act = current_thread();
1c79356b 1082 uth = get_bsdthread_info(th_act);
91447636 1083 sel = &uth->uu_select;
fe8ab488 1084 seldata = &uth->uu_kevent.ss_select_data;
1c79356b
A
1085 *retval = 0;
1086
fe8ab488
A
1087 seldata->args = uap;
1088 seldata->retval = retval;
3e170ce0
A
1089 seldata->wqp = NULL;
1090 seldata->count = 0;
fe8ab488 1091
0b4e3aa0 1092 if (uap->nd < 0) {
1c79356b 1093 return (EINVAL);
0b4e3aa0 1094 }
1c79356b 1095
2d21ac55
A
1096 /* select on thread of process that already called proc_exit() */
1097 if (p->p_fd == NULL) {
1098 return (EBADF);
1099 }
1100
1c79356b
A
1101 if (uap->nd > p->p_fd->fd_nfiles)
1102 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
1103
1104 nw = howmany(uap->nd, NFDBITS);
1105 ni = nw * sizeof(fd_mask);
1106
1107 /*
2d21ac55
A
1108 * if the previously allocated space for the bits is smaller than
1109 * what is requested or no space has yet been allocated for this
1110 * thread, allocate enough space now.
1111 *
1112 * Note: If this process fails, select() will return EAGAIN; this
1113 * is the same thing pool() returns in a no-memory situation, but
1114 * it is not a POSIX compliant error code for select().
1c79356b
A
1115 */
1116 if (sel->nbytes < (3 * ni)) {
2d21ac55
A
1117 int nbytes = 3 * ni;
1118
1119 /* Free previous allocation, if any */
1120 if (sel->ibits != NULL)
1121 FREE(sel->ibits, M_TEMP);
1122 if (sel->obits != NULL) {
1123 FREE(sel->obits, M_TEMP);
1124 /* NULL out; subsequent ibits allocation may fail */
1125 sel->obits = NULL;
1126 }
1127
1128 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1129 if (sel->ibits == NULL)
1130 return (EAGAIN);
1131 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1132 if (sel->obits == NULL) {
1133 FREE(sel->ibits, M_TEMP);
1134 sel->ibits = NULL;
1135 return (EAGAIN);
1136 }
1137 sel->nbytes = nbytes;
1c79356b 1138 needzerofill = 0;
2d21ac55 1139 }
1c79356b
A
1140
1141 if (needzerofill) {
1142 bzero((caddr_t)sel->ibits, sel->nbytes);
1143 bzero((caddr_t)sel->obits, sel->nbytes);
1144 }
1145
1146 /*
1147 * get the bits from the user address space
1148 */
1149#define getbits(name, x) \
1150 do { \
91447636 1151 if (uap->name && (error = copyin(uap->name, \
1c79356b
A
1152 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1153 goto continuation; \
1154 } while (0)
1155
1156 getbits(in, 0);
1157 getbits(ou, 1);
1158 getbits(ex, 2);
1159#undef getbits
1160
4bd07ac2 1161 seldata->abstime = timeout;
9bccf70c 1162
6d2010ae 1163 if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
0b4e3aa0
A
1164 goto continuation;
1165 }
b0d623f7 1166
3e170ce0
A
1167 /*
1168 * We need an array of waitq pointers. This is due to the new way
1169 * in which waitqs are linked to sets. When a thread selects on a
1170 * file descriptor, a waitq (embedded in a selinfo structure) is
1171 * added to the thread's local waitq set. There is no longer any
1172 * way to directly iterate over all members of a given waitq set.
1173 * The process of linking a waitq into a set may allocate a link
1174 * table object. Because we can't iterate over all the waitqs to
1175 * which our thread waitq set belongs, we need a way of removing
1176 * this link object!
1177 *
1178 * Thus we need a buffer which will hold one waitq pointer
1179 * per FD being selected. During the tear-down phase we can use
1180 * these pointers to dis-associate the underlying selinfo's waitq
1181 * from our thread's waitq set.
1182 *
1183 * Because we also need to allocate a waitq set for this thread,
1184 * we use a bare buffer pointer to hold all the memory. Note that
1185 * this memory is cached in the thread pointer and not reaped until
1186 * the thread exists. This is generally OK because threads that
1187 * call select tend to keep calling select repeatedly.
1188 */
1189 sz = ALIGN(sizeof(struct waitq_set)) + (count * sizeof(uint64_t));
1190 if (sz > uth->uu_wqstate_sz) {
1191 /* (re)allocate a buffer to hold waitq pointers */
1192 if (uth->uu_wqset) {
1193 if (waitq_set_is_valid(uth->uu_wqset))
1194 waitq_set_deinit(uth->uu_wqset);
1195 FREE(uth->uu_wqset, M_SELECT);
1196 } else if (uth->uu_wqstate_sz && !uth->uu_wqset)
1197 panic("select: thread structure corrupt! "
1198 "uu_wqstate_sz:%ld, wqstate_buf == NULL",
1199 uth->uu_wqstate_sz);
1200 uth->uu_wqstate_sz = sz;
1201 MALLOC(uth->uu_wqset, struct waitq_set *, sz, M_SELECT, M_WAITOK);
1202 if (!uth->uu_wqset)
1203 panic("can't allocate %ld bytes for wqstate buffer",
1204 uth->uu_wqstate_sz);
1205 waitq_set_init(uth->uu_wqset,
39037602 1206 SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST, NULL, NULL);
3e170ce0
A
1207 }
1208
1209 if (!waitq_set_is_valid(uth->uu_wqset))
1210 waitq_set_init(uth->uu_wqset,
39037602 1211 SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST, NULL, NULL);
3e170ce0
A
1212
1213 /* the last chunk of our buffer is an array of waitq pointers */
1214 seldata->wqp = (uint64_t *)((char *)(uth->uu_wqset) + ALIGN(sizeof(struct waitq_set)));
1215 bzero(seldata->wqp, sz - ALIGN(sizeof(struct waitq_set)));
1216
fe8ab488 1217 seldata->count = count;
0b4e3aa0 1218
1c79356b 1219continuation:
6d2010ae
A
1220
1221 if (error) {
1222 /*
1223 * We have already cleaned up any state we established,
1224 * either locally or as a result of selcount(). We don't
1225 * need to wait_subqueue_unlink_all(), since we haven't set
1226 * anything at this point.
1227 */
1228 return (error);
1229 }
1230
1231 return selprocess(0, SEL_FIRSTPASS);
0b4e3aa0
A
1232}
1233
1234int
1235selcontinue(int error)
1236{
9bccf70c 1237 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
1238}
1239
6d2010ae
A
1240
1241/*
1242 * selprocess
1243 *
1244 * Parameters: error The error code from our caller
1245 * sel_pass The pass we are on
1246 */
1c79356b 1247int
91447636 1248selprocess(int error, int sel_pass)
1c79356b 1249{
9bccf70c 1250 int ncoll;
1c79356b 1251 u_int ni, nw;
91447636 1252 thread_t th_act;
1c79356b
A
1253 struct uthread *uth;
1254 struct proc *p;
fe8ab488 1255 struct select_nocancel_args *uap;
1c79356b
A
1256 int *retval;
1257 struct _select *sel;
fe8ab488 1258 struct _select_data *seldata;
0b4e3aa0 1259 int unwind = 1;
9bccf70c 1260 int prepost = 0;
0b4e3aa0
A
1261 int somewakeup = 0;
1262 int doretry = 0;
9bccf70c 1263 wait_result_t wait_result;
1c79356b
A
1264
1265 p = current_proc();
91447636 1266 th_act = current_thread();
1c79356b 1267 uth = get_bsdthread_info(th_act);
91447636 1268 sel = &uth->uu_select;
fe8ab488
A
1269 seldata = &uth->uu_kevent.ss_select_data;
1270 uap = seldata->args;
1271 retval = seldata->retval;
1c79356b 1272
0b4e3aa0 1273 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
3e170ce0 1274 unwind = 0;
fe8ab488 1275 if (seldata->count == 0)
3e170ce0 1276 unwind = 0;
1c79356b 1277retry:
3e170ce0 1278 if (error != 0)
6d2010ae 1279 goto done;
0b4e3aa0 1280
1c79356b 1281 ncoll = nselcoll;
b0d623f7 1282 OSBitOrAtomic(P_SELECT, &p->p_flag);
3e170ce0 1283
0b4e3aa0 1284 /* skip scans if the select is just for timeouts */
fe8ab488 1285 if (seldata->count) {
3e170ce0 1286 error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, uth->uu_wqset);
0b4e3aa0
A
1287 if (error || *retval) {
1288 goto done;
1289 }
3e170ce0
A
1290 if (prepost || somewakeup) {
1291 /*
1292 * if the select of log, then we can wakeup and
1293 * discover some one else already read the data;
1294 * go to select again if time permits
1295 */
1296 prepost = 0;
1297 somewakeup = 0;
1298 doretry = 1;
0b4e3aa0
A
1299 }
1300 }
1301
9bccf70c
A
1302 if (uap->tv) {
1303 uint64_t now;
1304
1305 clock_get_uptime(&now);
fe8ab488 1306 if (now >= seldata->abstime)
9bccf70c 1307 goto done;
1c79356b 1308 }
0b4e3aa0
A
1309
1310 if (doretry) {
1311 /* cleanup obits and try again */
1312 doretry = 0;
1313 sel_pass = SEL_FIRSTPASS;
1314 goto retry;
1315 }
1316
1c79356b
A
1317 /*
1318 * To effect a poll, the timeout argument should be
1319 * non-nil, pointing to a zero-valued timeval structure.
1320 */
fe8ab488 1321 if (uap->tv && seldata->abstime == 0) {
1c79356b
A
1322 goto done;
1323 }
0b4e3aa0
A
1324
1325 /* No spurious wakeups due to colls,no need to check for them */
1326 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1327 sel_pass = SEL_FIRSTPASS;
1c79356b
A
1328 goto retry;
1329 }
0b4e3aa0 1330
b0d623f7 1331 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b 1332
0b4e3aa0 1333 /* if the select is just for timeout skip check */
3e170ce0 1334 if (seldata->count && (sel_pass == SEL_SECONDPASS))
0b4e3aa0
A
1335 panic("selprocess: 2nd pass assertwaiting");
1336
3e170ce0
A
1337 /* waitq_set has waitqueue as first element */
1338 wait_result = waitq_assert_wait64_leeway((struct waitq *)uth->uu_wqset,
1339 NO_EVENT64, THREAD_ABORTSAFE,
1340 TIMEOUT_URGENCY_USER_NORMAL,
1341 seldata->abstime,
1342 TIMEOUT_NO_LEEWAY);
9bccf70c
A
1343 if (wait_result != THREAD_AWAKENED) {
1344 /* there are no preposted events */
91447636
A
1345 error = tsleep1(NULL, PSOCK | PCATCH,
1346 "select", 0, selcontinue);
0b4e3aa0
A
1347 } else {
1348 prepost = 1;
1349 error = 0;
1350 }
1351
0b4e3aa0 1352 if (error == 0) {
6d2010ae 1353 sel_pass = SEL_SECONDPASS;
0b4e3aa0 1354 if (!prepost)
6d2010ae 1355 somewakeup = 1;
1c79356b 1356 goto retry;
0b4e3aa0 1357 }
1c79356b 1358done:
91447636 1359 if (unwind) {
91447636 1360 seldrop(p, sel->ibits, uap->nd);
3e170ce0
A
1361 waitq_set_deinit(uth->uu_wqset);
1362 /*
1363 * zero out the waitq pointer array to avoid use-after free
1364 * errors in the selcount error path (seldrop_locked) if/when
1365 * the thread re-calls select().
1366 */
1367 bzero((void *)uth->uu_wqset, uth->uu_wqstate_sz);
91447636 1368 }
b0d623f7 1369 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b
A
1370 /* select is not restarted after signals... */
1371 if (error == ERESTART)
1372 error = EINTR;
1373 if (error == EWOULDBLOCK)
1374 error = 0;
1c79356b
A
1375 nw = howmany(uap->nd, NFDBITS);
1376 ni = nw * sizeof(fd_mask);
1377
1378#define putbits(name, x) \
1379 do { \
91447636
A
1380 if (uap->name && (error2 = \
1381 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1c79356b
A
1382 error = error2; \
1383 } while (0)
1384
1385 if (error == 0) {
1386 int error2;
1387
1388 putbits(in, 0);
1389 putbits(ou, 1);
1390 putbits(ex, 2);
1391#undef putbits
1392 }
4bd07ac2
A
1393
1394 if (error != EINTR && sel_pass == SEL_SECONDPASS && uth->uu_flag & UT_SAS_OLDMASK) {
1395 /* restore signal mask - continuation case */
1396 uth->uu_sigmask = uth->uu_oldmask;
1397 uth->uu_oldmask = 0;
1398 uth->uu_flag &= ~UT_SAS_OLDMASK;
1399 }
1400
1c79356b 1401 return(error);
1c79356b
A
1402}
1403
6d2010ae 1404
3e170ce0
A
1405/**
1406 * remove the fileproc's underlying waitq from the supplied waitq set;
1407 * clear FP_INSELECT when appropriate
1408 *
1409 * Parameters:
1410 * fp File proc that is potentially currently in select
1411 * wqset Waitq set to which the fileproc may belong
1412 * (usually this is the thread's private waitq set)
1413 * Conditions:
1414 * proc_fdlock is held
1415 */
1416static void selunlinkfp(struct fileproc *fp, uint64_t wqp_id, struct waitq_set *wqset)
1417{
1418 int valid_set = waitq_set_is_valid(wqset);
1419 int valid_q = !!wqp_id;
1420
1421 /*
1422 * This could be called (from selcount error path) before we setup
1423 * the thread's wqset. Check the wqset passed in, and only unlink if
1424 * the set is valid.
1425 */
1426
1427 /* unlink the underlying waitq from the input set (thread waitq set) */
1428 if (valid_q && valid_set)
1429 waitq_unlink_by_prepost_id(wqp_id, wqset);
1430
1431 /* allow passing a NULL/invalid fp for seldrop unwind */
1432 if (!fp || !(fp->f_flags & (FP_INSELECT|FP_SELCONFLICT)))
1433 return;
1434
1435 /*
1436 * We can always remove the conflict queue from our thread's set: this
1437 * will not affect other threads that potentially need to be awoken on
1438 * the conflict queue during a fileproc_drain - those sets will still
1439 * be linked with the global conflict queue, and the last waiter
1440 * on the fp clears the CONFLICT marker.
1441 */
1442 if (valid_set && (fp->f_flags & FP_SELCONFLICT))
1443 waitq_unlink(&select_conflict_queue, wqset);
1444
1445 /* jca: TODO:
1446 * This isn't quite right - we don't actually know if this
1447 * fileproc is in another select or not! Here we just assume
1448 * that if we were the first thread to select on the FD, then
1449 * we'll be the one to clear this flag...
1450 */
1451 if (valid_set && fp->f_wset == (void *)wqset) {
1452 fp->f_flags &= ~FP_INSELECT;
1453 fp->f_wset = NULL;
1454 }
1455}
1456
1457/**
1458 * connect a fileproc to the given wqset, potentially bridging to a waitq
1459 * pointed to indirectly by wq_data
1460 *
1461 * Parameters:
1462 * fp File proc potentially currently in select
1463 * wq_data Pointer to a pointer to a waitq (could be NULL)
1464 * wqset Waitq set to which the fileproc should now belong
1465 * (usually this is the thread's private waitq set)
1466 *
1467 * Conditions:
1468 * proc_fdlock is held
1469 */
1470static uint64_t sellinkfp(struct fileproc *fp, void **wq_data, struct waitq_set *wqset)
1471{
1472 struct waitq *f_wq = NULL;
1473
1474 if ((fp->f_flags & FP_INSELECT) != FP_INSELECT) {
1475 if (wq_data)
1476 panic("non-null data:%p on fp:%p not in select?!"
1477 "(wqset:%p)", wq_data, fp, wqset);
1478 return 0;
1479 }
1480
1481 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1482 /*
1483 * The conflict queue requires disabling interrupts, so we
1484 * need to explicitly reserve a link object to avoid a
1485 * panic/assert in the waitq code. Hopefully this extra step
1486 * can be avoided if we can split the waitq structure into
1487 * blocking and linkage sub-structures.
1488 */
1489 uint64_t reserved_link = waitq_link_reserve(&select_conflict_queue);
1490 waitq_link(&select_conflict_queue, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
1491 waitq_link_release(reserved_link);
1492 }
1493
1494 /*
1495 * The wq_data parameter has potentially been set by selrecord called
1496 * from a subsystems fo_select() function. If the subsystem does not
1497 * call selrecord, then wq_data will be NULL
1498 *
1499 * Use memcpy to get the value into a proper pointer because
1500 * wq_data most likely points to a stack variable that could be
1501 * unaligned on 32-bit systems.
1502 */
1503 if (wq_data) {
1504 memcpy(&f_wq, wq_data, sizeof(f_wq));
1505 if (!waitq_is_valid(f_wq))
1506 f_wq = NULL;
1507 }
1508
1509 /* record the first thread's wqset in the fileproc structure */
1510 if (!fp->f_wset)
1511 fp->f_wset = (void *)wqset;
1512
1513 /* handles NULL f_wq */
1514 return waitq_get_prepost_id(f_wq);
1515}
1516
1517
6d2010ae
A
1518/*
1519 * selscan
1520 *
1521 * Parameters: p Process performing the select
1522 * sel The per-thread select context structure
1523 * nfd The number of file descriptors to scan
1524 * retval The per thread system call return area
1525 * sel_pass Which pass this is; allowed values are
1526 * SEL_FIRSTPASS and SEL_SECONDPASS
3e170ce0 1527 * wqset The per thread wait queue set
6d2010ae
A
1528 *
1529 * Returns: 0 Success
1530 * EIO Invalid p->p_fd field XXX Obsolete?
1531 * EBADF One of the files in the bit vector is
1532 * invalid.
1533 */
1c79356b 1534static int
3e170ce0
A
1535selscan(struct proc *p, struct _select *sel, struct _select_data * seldata,
1536 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset)
1c79356b 1537{
2d21ac55
A
1538 struct filedesc *fdp = p->p_fd;
1539 int msk, i, j, fd;
1540 u_int32_t bits;
91447636 1541 struct fileproc *fp;
6d2010ae
A
1542 int n = 0; /* count of bits */
1543 int nc = 0; /* bit vector offset (nc'th bit) */
1c79356b
A
1544 static int flag[3] = { FREAD, FWRITE, 0 };
1545 u_int32_t *iptr, *optr;
1546 u_int nw;
0b4e3aa0 1547 u_int32_t *ibits, *obits;
3e170ce0 1548 uint64_t reserved_link, *rl_ptr = NULL;
6d2010ae 1549 int count;
2d21ac55 1550 struct vfs_context context = *vfs_context_current();
1c79356b
A
1551
1552 /*
1553 * Problems when reboot; due to MacOSX signal probs
1554 * in Beaker1C ; verify that the p->p_fd is valid
1555 */
1556 if (fdp == NULL) {
1557 *retval=0;
1558 return(EIO);
1559 }
0b4e3aa0
A
1560 ibits = sel->ibits;
1561 obits = sel->obits;
0b4e3aa0 1562
1c79356b
A
1563 nw = howmany(nfd, NFDBITS);
1564
fe8ab488 1565 count = seldata->count;
2d21ac55
A
1566
1567 nc = 0;
3e170ce0
A
1568 if (!count) {
1569 *retval = 0;
1570 return 0;
1571 }
1572
1573 proc_fdlock(p);
1574 for (msk = 0; msk < 3; msk++) {
1575 iptr = (u_int32_t *)&ibits[msk * nw];
1576 optr = (u_int32_t *)&obits[msk * nw];
1577
1578 for (i = 0; i < nfd; i += NFDBITS) {
1579 bits = iptr[i/NFDBITS];
2d21ac55 1580
3e170ce0
A
1581 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1582 bits &= ~(1 << j);
2d21ac55 1583
3e170ce0
A
1584 if (fd < fdp->fd_nfiles)
1585 fp = fdp->fd_ofiles[fd];
1586 else
1587 fp = NULL;
fe8ab488 1588
3e170ce0
A
1589 if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1590 /*
1591 * If we abort because of a bad
1592 * fd, let the caller unwind...
1593 */
1594 proc_fdunlock(p);
1595 return(EBADF);
1596 }
1597 if (sel_pass == SEL_SECONDPASS) {
1598 reserved_link = 0;
1599 rl_ptr = NULL;
1600 selunlinkfp(fp, seldata->wqp[nc], wqset);
1601 } else {
1602 reserved_link = waitq_link_reserve((struct waitq *)wqset);
1603 rl_ptr = &reserved_link;
1604 if (fp->f_flags & FP_INSELECT)
1605 /* someone is already in select on this fp */
1606 fp->f_flags |= FP_SELCONFLICT;
fe8ab488 1607 else
3e170ce0
A
1608 fp->f_flags |= FP_INSELECT;
1609 }
2d21ac55 1610
3e170ce0 1611 context.vc_ucred = fp->f_cred;
2d21ac55 1612
3e170ce0
A
1613 /*
1614 * stash this value b/c fo_select may replace
1615 * reserved_link with a pointer to a waitq object
1616 */
1617 uint64_t rsvd = reserved_link;
1618
1619 /* The select; set the bit, if true */
1620 if (fp->f_ops && fp->f_type
1621 && fo_select(fp, flag[msk], rl_ptr, &context)) {
1622 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1623 n++;
1624 }
1625 if (sel_pass == SEL_FIRSTPASS) {
1626 waitq_link_release(rsvd);
1627 /*
1628 * If the fp's supporting selinfo structure was linked
1629 * to this thread's waitq set, then 'reserved_link'
1630 * will have been updated by selrecord to be a pointer
1631 * to the selinfo's waitq.
1632 */
1633 if (reserved_link == rsvd)
1634 rl_ptr = NULL; /* fo_select never called selrecord() */
1635 /*
1636 * Hook up the thread's waitq set either to
1637 * the fileproc structure, or to the global
1638 * conflict queue: but only on the first
1639 * select pass.
1640 */
1641 seldata->wqp[nc] = sellinkfp(fp, (void **)rl_ptr, wqset);
2d21ac55 1642 }
3e170ce0 1643 nc++;
2d21ac55
A
1644 }
1645 }
0b4e3aa0 1646 }
3e170ce0
A
1647 proc_fdunlock(p);
1648
1c79356b
A
1649 *retval = n;
1650 return (0);
1651}
1652
3e170ce0 1653int poll_callback(struct kqueue *, struct kevent_internal_s *, void *);
91447636
A
1654
1655struct poll_continue_args {
1656 user_addr_t pca_fds;
1657 u_int pca_nfds;
1658 u_int pca_rfds;
1659};
1660
9bccf70c 1661int
b0d623f7 1662poll(struct proc *p, struct poll_args *uap, int32_t *retval)
2d21ac55
A
1663{
1664 __pthread_testcancel(1);
1665 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1666}
1667
1668
1669int
b0d623f7 1670poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1c79356b 1671{
91447636
A
1672 struct poll_continue_args *cont;
1673 struct pollfd *fds;
1674 struct kqueue *kq;
1675 struct timeval atv;
1676 int ncoll, error = 0;
1677 u_int nfds = uap->nfds;
1678 u_int rfds = 0;
1679 u_int i;
1680 size_t ni;
1c79356b 1681
91447636
A
1682 /*
1683 * This is kinda bogus. We have fd limits, but that is not
1684 * really related to the size of the pollfd array. Make sure
1685 * we let the process use at least FD_SETSIZE entries and at
1686 * least enough for the current limits. We want to be reasonably
1687 * safe, but not overly restrictive.
1688 */
1689 if (nfds > OPEN_MAX ||
2d21ac55 1690 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
91447636 1691 return (EINVAL);
1c79356b 1692
39037602 1693 kq = kqueue_alloc(p, 0);
91447636
A
1694 if (kq == NULL)
1695 return (EAGAIN);
1696
1697 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1698 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1699 if (NULL == cont) {
1700 error = EAGAIN;
1701 goto out;
1702 }
1703
1704 fds = (struct pollfd *)&cont[1];
1705 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1706 if (error)
1707 goto out;
1708
1709 if (uap->timeout != -1) {
1710 struct timeval rtv;
1711
1712 atv.tv_sec = uap->timeout / 1000;
1713 atv.tv_usec = (uap->timeout % 1000) * 1000;
1714 if (itimerfix(&atv)) {
1715 error = EINVAL;
1716 goto out;
1717 }
1718 getmicrouptime(&rtv);
1719 timevaladd(&atv, &rtv);
1720 } else {
1721 atv.tv_sec = 0;
1722 atv.tv_usec = 0;
1723 }
1724
1725 /* JMM - all this P_SELECT stuff is bogus */
1726 ncoll = nselcoll;
b0d623f7 1727 OSBitOrAtomic(P_SELECT, &p->p_flag);
91447636
A
1728 for (i = 0; i < nfds; i++) {
1729 short events = fds[i].events;
91447636
A
1730
1731 /* per spec, ignore fd values below zero */
1732 if (fds[i].fd < 0) {
1733 fds[i].revents = 0;
1734 continue;
1735 }
1736
1737 /* convert the poll event into a kqueue kevent */
3e170ce0
A
1738 struct kevent_internal_s kev = {
1739 .ident = fds[i].fd,
1740 .flags = EV_ADD | EV_ONESHOT | EV_POLL,
1741 .udata = CAST_USER_ADDR_T(&fds[i]) };
91447636
A
1742
1743 /* Handle input events */
2d21ac55 1744 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
91447636 1745 kev.filter = EVFILT_READ;
04b8595b 1746 if (events & ( POLLPRI | POLLRDBAND ))
91447636 1747 kev.flags |= EV_OOBAND;
39037602 1748 kevent_register(kq, &kev, p);
91447636
A
1749 }
1750
1751 /* Handle output events */
39037602
A
1752 if ((kev.flags & EV_ERROR) == 0 &&
1753 (events & ( POLLOUT | POLLWRNORM | POLLWRBAND ))) {
91447636 1754 kev.filter = EVFILT_WRITE;
39037602 1755 kevent_register(kq, &kev, p);
91447636
A
1756 }
1757
1758 /* Handle BSD extension vnode events */
39037602
A
1759 if ((kev.flags & EV_ERROR) == 0 &&
1760 (events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE ))) {
91447636
A
1761 kev.filter = EVFILT_VNODE;
1762 kev.fflags = 0;
1763 if (events & POLLEXTEND)
1764 kev.fflags |= NOTE_EXTEND;
1765 if (events & POLLATTRIB)
1766 kev.fflags |= NOTE_ATTRIB;
1767 if (events & POLLNLINK)
1768 kev.fflags |= NOTE_LINK;
1769 if (events & POLLWRITE)
1770 kev.fflags |= NOTE_WRITE;
39037602 1771 kevent_register(kq, &kev, p);
91447636
A
1772 }
1773
39037602 1774 if (kev.flags & EV_ERROR) {
91447636
A
1775 fds[i].revents = POLLNVAL;
1776 rfds++;
1777 } else
1778 fds[i].revents = 0;
1779 }
1780
1781 /* Did we have any trouble registering? */
39037602 1782 if (rfds == nfds)
91447636
A
1783 goto done;
1784
1785 /* scan for, and possibly wait for, the kevents to trigger */
1786 cont->pca_fds = uap->fds;
1787 cont->pca_nfds = nfds;
1788 cont->pca_rfds = rfds;
39037602 1789 error = kqueue_scan(kq, poll_callback, NULL, cont, NULL, &atv, p);
91447636
A
1790 rfds = cont->pca_rfds;
1791
1792 done:
b0d623f7 1793 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
91447636
A
1794 /* poll is not restarted after signals... */
1795 if (error == ERESTART)
1796 error = EINTR;
1797 if (error == EWOULDBLOCK)
1798 error = 0;
1799 if (error == 0) {
1800 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1801 *retval = rfds;
1802 }
1803
1804 out:
1805 if (NULL != cont)
1806 FREE(cont, M_TEMP);
1807
2d21ac55 1808 kqueue_dealloc(kq);
91447636
A
1809 return (error);
1810}
1811
2d21ac55 1812int
3e170ce0 1813poll_callback(__unused struct kqueue *kq, struct kevent_internal_s *kevp, void *data)
91447636
A
1814{
1815 struct poll_continue_args *cont = (struct poll_continue_args *)data;
1816 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
316670eb 1817 short prev_revents = fds->revents;
04b8595b 1818 short mask = 0;
ff6e181a 1819
91447636
A
1820 /* convert the results back into revents */
1821 if (kevp->flags & EV_EOF)
1822 fds->revents |= POLLHUP;
1823 if (kevp->flags & EV_ERROR)
1824 fds->revents |= POLLERR;
91447636
A
1825
1826 switch (kevp->filter) {
1827 case EVFILT_READ:
ff6e181a
A
1828 if (fds->revents & POLLHUP)
1829 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1830 else {
3e170ce0 1831 mask = (POLLIN | POLLRDNORM);
ff6e181a 1832 if (kevp->flags & EV_OOBAND)
3e170ce0 1833 mask |= (POLLPRI | POLLRDBAND);
ff6e181a
A
1834 }
1835 fds->revents |= (fds->events & mask);
91447636
A
1836 break;
1837
1838 case EVFILT_WRITE:
1839 if (!(fds->revents & POLLHUP))
1840 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1841 break;
1842
2d21ac55 1843 case EVFILT_VNODE:
91447636
A
1844 if (kevp->fflags & NOTE_EXTEND)
1845 fds->revents |= (fds->events & POLLEXTEND);
1846 if (kevp->fflags & NOTE_ATTRIB)
1847 fds->revents |= (fds->events & POLLATTRIB);
1848 if (kevp->fflags & NOTE_LINK)
1849 fds->revents |= (fds->events & POLLNLINK);
1850 if (kevp->fflags & NOTE_WRITE)
1851 fds->revents |= (fds->events & POLLWRITE);
1852 break;
1853 }
2d21ac55 1854
316670eb 1855 if (fds->revents != 0 && prev_revents == 0)
2d21ac55
A
1856 cont->pca_rfds++;
1857
91447636
A
1858 return 0;
1859}
1860
1861int
1862seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1863{
1864
1865 return (1);
1866}
1867
6d2010ae
A
1868/*
1869 * selcount
1870 *
1871 * Count the number of bits set in the input bit vector, and establish an
1872 * outstanding fp->f_iocount for each of the descriptors which will be in
1873 * use in the select operation.
1874 *
1875 * Parameters: p The process doing the select
1876 * ibits The input bit vector
1877 * nfd The number of fd's in the vector
1878 * countp Pointer to where to store the bit count
1879 *
1880 * Returns: 0 Success
1881 * EIO Bad per process open file table
1882 * EBADF One of the bits in the input bit vector
1883 * references an invalid fd
1884 *
1885 * Implicit: *countp (modified) Count of fd's
1886 *
1887 * Notes: This function is the first pass under the proc_fdlock() that
1888 * permits us to recognize invalid descriptors in the bit vector;
1889 * the may, however, not remain valid through the drop and
1890 * later reacquisition of the proc_fdlock().
1891 */
91447636 1892static int
6d2010ae 1893selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
91447636 1894{
2d21ac55
A
1895 struct filedesc *fdp = p->p_fd;
1896 int msk, i, j, fd;
1897 u_int32_t bits;
91447636 1898 struct fileproc *fp;
0b4e3aa0 1899 int n = 0;
91447636 1900 u_int32_t *iptr;
0b4e3aa0 1901 u_int nw;
91447636
A
1902 int error=0;
1903 int dropcount;
6d2010ae 1904 int need_wakeup = 0;
0b4e3aa0
A
1905
1906 /*
1907 * Problems when reboot; due to MacOSX signal probs
1908 * in Beaker1C ; verify that the p->p_fd is valid
1909 */
1910 if (fdp == NULL) {
2d21ac55 1911 *countp = 0;
0b4e3aa0
A
1912 return(EIO);
1913 }
0b4e3aa0
A
1914 nw = howmany(nfd, NFDBITS);
1915
91447636 1916 proc_fdlock(p);
0b4e3aa0
A
1917 for (msk = 0; msk < 3; msk++) {
1918 iptr = (u_int32_t *)&ibits[msk * nw];
1919 for (i = 0; i < nfd; i += NFDBITS) {
1920 bits = iptr[i/NFDBITS];
1921 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1922 bits &= ~(1 << j);
fe8ab488
A
1923
1924 if (fd < fdp->fd_nfiles)
1925 fp = fdp->fd_ofiles[fd];
1926 else
1927 fp = NULL;
1928
0b4e3aa0
A
1929 if (fp == NULL ||
1930 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2d21ac55 1931 *countp = 0;
91447636
A
1932 error = EBADF;
1933 goto bad;
0b4e3aa0 1934 }
91447636 1935 fp->f_iocount++;
0b4e3aa0
A
1936 n++;
1937 }
1938 }
1939 }
91447636
A
1940 proc_fdunlock(p);
1941
2d21ac55 1942 *countp = n;
91447636 1943 return (0);
6d2010ae 1944
91447636
A
1945bad:
1946 dropcount = 0;
1947
3e170ce0 1948 if (n == 0)
91447636 1949 goto out;
6d2010ae
A
1950 /* Ignore error return; it's already EBADF */
1951 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
91447636 1952
91447636
A
1953out:
1954 proc_fdunlock(p);
6d2010ae
A
1955 if (need_wakeup) {
1956 wakeup(&p->p_fpdrainwait);
1957 }
91447636
A
1958 return(error);
1959}
1960
6d2010ae
A
1961
1962/*
1963 * seldrop_locked
1964 *
1965 * Drop outstanding wait queue references set up during selscan(); drop the
1966 * outstanding per fileproc f_iocount() picked up during the selcount().
1967 *
1968 * Parameters: p Process performing the select
3e170ce0 1969 * ibits Input bit bector of fd's
6d2010ae
A
1970 * nfd Number of fd's
1971 * lim Limit to number of vector entries to
1972 * consider, or -1 for "all"
1973 * inselect True if
1974 * need_wakeup Pointer to flag to set to do a wakeup
1975 * if f_iocont on any descriptor goes to 0
1976 *
1977 * Returns: 0 Success
1978 * EBADF One or more fds in the bit vector
1979 * were invalid, but the rest
1980 * were successfully dropped
1981 *
1982 * Notes: An fd make become bad while the proc_fdlock() is not held,
1983 * if a multithreaded application closes the fd out from under
1984 * the in progress select. In this case, we still have to
1985 * clean up after the set up on the remaining fds.
1986 */
91447636 1987static int
6d2010ae 1988seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
91447636 1989{
2d21ac55 1990 struct filedesc *fdp = p->p_fd;
3e170ce0 1991 int msk, i, j, nc, fd;
2d21ac55 1992 u_int32_t bits;
91447636 1993 struct fileproc *fp;
91447636
A
1994 u_int32_t *iptr;
1995 u_int nw;
6d2010ae
A
1996 int error = 0;
1997 int dropcount = 0;
1998 uthread_t uth = get_bsdthread_info(current_thread());
3e170ce0 1999 struct _select_data *seldata;
6d2010ae
A
2000
2001 *need_wakeup = 0;
91447636
A
2002
2003 /*
2004 * Problems when reboot; due to MacOSX signal probs
2005 * in Beaker1C ; verify that the p->p_fd is valid
2006 */
2007 if (fdp == NULL) {
2008 return(EIO);
2009 }
2010
2011 nw = howmany(nfd, NFDBITS);
3e170ce0 2012 seldata = &uth->uu_kevent.ss_select_data;
91447636 2013
3e170ce0 2014 nc = 0;
91447636
A
2015 for (msk = 0; msk < 3; msk++) {
2016 iptr = (u_int32_t *)&ibits[msk * nw];
2017 for (i = 0; i < nfd; i += NFDBITS) {
2018 bits = iptr[i/NFDBITS];
2019 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
2020 bits &= ~(1 << j);
2021 fp = fdp->fd_ofiles[fd];
6d2010ae
A
2022 /*
2023 * If we've already dropped as many as were
2024 * counted/scanned, then we are done.
2025 */
2026 if ((fromselcount != 0) && (++dropcount > lim))
2027 goto done;
2028
3e170ce0
A
2029 /*
2030 * unlink even potentially NULL fileprocs.
2031 * If the FD was closed from under us, we
2032 * still need to cleanup the waitq links!
2033 */
2034 selunlinkfp(fp,
2035 seldata->wqp ? seldata->wqp[nc] : 0,
2036 uth->uu_wqset);
2037
2038 nc++;
2039
6d2010ae
A
2040 if (fp == NULL) {
2041 /* skip (now) bad fds */
2042 error = EBADF;
2043 continue;
2044 }
91447636 2045
6d2010ae
A
2046 fp->f_iocount--;
2047 if (fp->f_iocount < 0)
2048 panic("f_iocount overdecrement!");
2049
2050 if (fp->f_iocount == 0) {
2051 /*
2052 * The last iocount is responsible for clearing
2053 * selconfict flag - even if we didn't set it -
2054 * and is also responsible for waking up anyone
2055 * waiting on iocounts to drain.
2056 */
2057 if (fp->f_flags & FP_SELCONFLICT)
2058 fp->f_flags &= ~FP_SELCONFLICT;
2059 if (p->p_fpdrainwait) {
2060 p->p_fpdrainwait = 0;
2061 *need_wakeup = 1;
2062 }
91447636
A
2063 }
2064 }
2065 }
2066 }
6d2010ae
A
2067done:
2068 return (error);
2069}
2070
2071
2072static int
2073seldrop(struct proc *p, u_int32_t *ibits, int nfd)
2074{
2075 int error;
2076 int need_wakeup = 0;
2077
2078 proc_fdlock(p);
2079 error = seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0);
91447636 2080 proc_fdunlock(p);
6d2010ae
A
2081 if (need_wakeup) {
2082 wakeup(&p->p_fpdrainwait);
2083 }
2084 return (error);
0b4e3aa0
A
2085}
2086
1c79356b
A
2087/*
2088 * Record a select request.
2089 */
2090void
3e170ce0 2091selrecord(__unused struct proc *selector, struct selinfo *sip, void *s_data)
1c79356b 2092{
91447636 2093 thread_t cur_act = current_thread();
0b4e3aa0 2094 struct uthread * ut = get_bsdthread_info(cur_act);
3e170ce0
A
2095 /* on input, s_data points to the 64-bit ID of a reserved link object */
2096 uint64_t *reserved_link = (uint64_t *)s_data;
1c79356b 2097
0b4e3aa0
A
2098 /* need to look at collisions */
2099
0b4e3aa0 2100 /*do not record if this is second pass of select */
3e170ce0 2101 if (!s_data)
0b4e3aa0 2102 return;
1c79356b 2103
0b4e3aa0 2104 if ((sip->si_flags & SI_INITED) == 0) {
39037602 2105 waitq_init(&sip->si_waitq, SYNC_POLICY_FIFO);
0b4e3aa0
A
2106 sip->si_flags |= SI_INITED;
2107 sip->si_flags &= ~SI_CLEAR;
2108 }
2109
3e170ce0 2110 if (sip->si_flags & SI_RECORDED)
0b4e3aa0 2111 sip->si_flags |= SI_COLL;
3e170ce0 2112 else
0b4e3aa0
A
2113 sip->si_flags &= ~SI_COLL;
2114
2115 sip->si_flags |= SI_RECORDED;
3e170ce0
A
2116 /* note: this checks for pre-existing linkage */
2117 waitq_link(&sip->si_waitq, ut->uu_wqset,
2118 WAITQ_SHOULD_LOCK, reserved_link);
2119
2120 /*
2121 * Always consume the reserved link.
2122 * We can always call waitq_link_release() safely because if
2123 * waitq_link is successful, it consumes the link and resets the
2124 * value to 0, in which case our call to release becomes a no-op.
2125 * If waitq_link fails, then the following release call will actually
2126 * release the reserved link object.
2127 */
2128 waitq_link_release(*reserved_link);
2129 *reserved_link = 0;
2130
2131 /*
2132 * Use the s_data pointer as an output parameter as well
2133 * This avoids changing the prototype for this function which is
2134 * used by many kexts. We need to surface the waitq object
2135 * associated with the selinfo we just added to the thread's select
2136 * set. New waitq sets do not have back-pointers to set members, so
2137 * the only way to clear out set linkage objects is to go from the
2138 * waitq to the set. We use a memcpy because s_data could be
2139 * pointing to an unaligned value on the stack
2140 * (especially on 32-bit systems)
2141 */
2142 void *wqptr = (void *)&sip->si_waitq;
2143 memcpy((void *)s_data, (void *)&wqptr, sizeof(void *));
0b4e3aa0 2144
1c79356b
A
2145 return;
2146}
2147
2148void
2d21ac55 2149selwakeup(struct selinfo *sip)
1c79356b 2150{
1c79356b 2151
0b4e3aa0 2152 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 2153 return;
0b4e3aa0 2154 }
1c79356b
A
2155
2156 if (sip->si_flags & SI_COLL) {
2157 nselcoll++;
2158 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
2159#if 0
2160 /* will not support */
2161 //wakeup((caddr_t)&selwait);
2162#endif
1c79356b 2163 }
1c79356b 2164
0b4e3aa0 2165 if (sip->si_flags & SI_RECORDED) {
3e170ce0
A
2166 waitq_wakeup64_all(&sip->si_waitq, NO_EVENT64,
2167 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
0b4e3aa0 2168 sip->si_flags &= ~SI_RECORDED;
1c79356b 2169 }
1c79356b 2170
1c79356b
A
2171}
2172
2173void
2d21ac55 2174selthreadclear(struct selinfo *sip)
1c79356b 2175{
3e170ce0 2176 struct waitq *wq;
1c79356b 2177
0b4e3aa0
A
2178 if ((sip->si_flags & SI_INITED) == 0) {
2179 return;
2180 }
2181 if (sip->si_flags & SI_RECORDED) {
2182 selwakeup(sip);
2183 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 2184 }
0b4e3aa0 2185 sip->si_flags |= SI_CLEAR;
3e170ce0
A
2186 sip->si_flags &= ~SI_INITED;
2187
2188 wq = &sip->si_waitq;
2189
2190 /*
2191 * Higher level logic may have a handle on this waitq's prepost ID,
2192 * but that's OK because the waitq_deinit will remove/invalidate the
2193 * prepost object (as well as mark the waitq invalid). This de-couples
2194 * us from any callers that may have a handle to this waitq via the
2195 * prepost ID.
2196 */
2197 waitq_deinit(wq);
1c79356b
A
2198}
2199
2200
91447636
A
2201
2202
91447636
A
2203#define DBG_POST 0x10
2204#define DBG_WATCH 0x11
2205#define DBG_WAIT 0x12
2206#define DBG_MOD 0x13
2207#define DBG_EWAKEUP 0x14
2208#define DBG_ENQUEUE 0x15
2209#define DBG_DEQUEUE 0x16
2210
2211#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
2212#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
2213#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
2214#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
2215#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
2216#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
2217#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
2218
2219
2220#define EVPROCDEQUE(p, evq) do { \
2221 proc_lock(p); \
2222 if (evq->ee_flags & EV_QUEUED) { \
2223 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \
2224 evq->ee_flags &= ~EV_QUEUED; \
2225 } \
2226 proc_unlock(p); \
2227} while (0);
2228
1c79356b
A
2229
2230/*
2231 * called upon socket close. deque and free all events for
91447636 2232 * the socket... socket must be locked by caller.
1c79356b 2233 */
9bccf70c 2234void
1c79356b
A
2235evsofree(struct socket *sp)
2236{
91447636
A
2237 struct eventqelt *evq, *next;
2238 proc_t p;
2239
2240 if (sp == NULL)
2241 return;
1c79356b 2242
91447636
A
2243 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
2244 next = evq->ee_slist.tqe_next;
2245 p = evq->ee_proc;
1c79356b 2246
91447636
A
2247 if (evq->ee_flags & EV_QUEUED) {
2248 EVPROCDEQUE(p, evq);
2249 }
2250 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
2251 FREE(evq, M_TEMP);
2252 }
1c79356b
A
2253}
2254
2255
91447636
A
2256/*
2257 * called upon pipe close. deque and free all events for
2258 * the pipe... pipe must be locked by caller
2259 */
2260void
2261evpipefree(struct pipe *cpipe)
2262{
2263 struct eventqelt *evq, *next;
2264 proc_t p;
1c79356b 2265
91447636
A
2266 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
2267 next = evq->ee_slist.tqe_next;
2268 p = evq->ee_proc;
1c79356b 2269
91447636
A
2270 EVPROCDEQUE(p, evq);
2271
2272 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
2273 FREE(evq, M_TEMP);
2274 }
2275}
1c79356b
A
2276
2277
2278/*
91447636
A
2279 * enqueue this event if it's not already queued. wakeup
2280 * the proc if we do queue this event to it...
2281 * entered with proc lock held... we drop it before
2282 * doing the wakeup and return in that state
1c79356b 2283 */
91447636
A
2284static void
2285evprocenque(struct eventqelt *evq)
1c79356b 2286{
91447636
A
2287 proc_t p;
2288
2289 assert(evq);
2290 p = evq->ee_proc;
2291
2d21ac55 2292 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
91447636
A
2293
2294 proc_lock(p);
2295
2296 if (evq->ee_flags & EV_QUEUED) {
2297 proc_unlock(p);
2298
2299 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
2300 return;
2301 }
2302 evq->ee_flags |= EV_QUEUED;
2303
2304 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
2305
2306 proc_unlock(p);
2307
2308 wakeup(&p->p_evlist);
2309
2310 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1c79356b
A
2311}
2312
91447636 2313
1c79356b 2314/*
91447636 2315 * pipe lock must be taken by the caller
1c79356b 2316 */
9bccf70c 2317void
91447636 2318postpipeevent(struct pipe *pipep, int event)
1c79356b 2319{
91447636
A
2320 int mask;
2321 struct eventqelt *evq;
2322
2323 if (pipep == NULL)
2324 return;
2325 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
2326
2327 for (evq = pipep->pipe_evlist.tqh_first;
2328 evq != NULL; evq = evq->ee_slist.tqe_next) {
2329
2330 if (evq->ee_eventmask == 0)
2331 continue;
2332 mask = 0;
2333
2334 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
2335
2336 case EV_RWBYTES:
2337 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
2338 mask |= EV_RE;
2339 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
2340 }
2341 if ((evq->ee_eventmask & EV_WR) &&
316670eb 2342 (MAX(pipep->pipe_buffer.size,PIPE_SIZE) - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
91447636
A
2343
2344 if (pipep->pipe_state & PIPE_EOF) {
2345 mask |= EV_WR|EV_RESET;
2346 break;
2347 }
2348 mask |= EV_WR;
316670eb 2349 evq->ee_req.er_wcnt = MAX(pipep->pipe_buffer.size, PIPE_SIZE) - pipep->pipe_buffer.cnt;
91447636
A
2350 }
2351 break;
2352
2353 case EV_WCLOSED:
2354 case EV_RCLOSED:
2355 if ((evq->ee_eventmask & EV_RE)) {
2356 mask |= EV_RE|EV_RCLOSED;
2357 }
2358 if ((evq->ee_eventmask & EV_WR)) {
2359 mask |= EV_WR|EV_WCLOSED;
2360 }
2361 break;
2362
2363 default:
2364 return;
2365 }
2366 if (mask) {
2367 /*
2368 * disarm... postevents are nops until this event is 'read' via
2369 * waitevent and then re-armed via modwatch
2370 */
2371 evq->ee_eventmask = 0;
2372
2373 /*
2374 * since events are disarmed until after the waitevent
2375 * the ee_req.er_xxxx fields can't change once we've
2376 * inserted this event into the proc queue...
2377 * therefore, the waitevent will see a 'consistent'
2378 * snapshot of the event, even though it won't hold
2379 * the pipe lock, and we're updating the event outside
2380 * of the proc lock, which it will hold
2381 */
2382 evq->ee_req.er_eventbits |= mask;
2383
2d21ac55 2384 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
91447636
A
2385
2386 evprocenque(evq);
2387 }
2388 }
2389 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
1c79356b
A
2390}
2391
2d21ac55 2392#if SOCKETS
1c79356b 2393/*
91447636
A
2394 * given either a sockbuf or a socket run down the
2395 * event list and queue ready events found...
2396 * the socket must be locked by the caller
1c79356b 2397 */
91447636
A
2398void
2399postevent(struct socket *sp, struct sockbuf *sb, int event)
1c79356b 2400{
91447636
A
2401 int mask;
2402 struct eventqelt *evq;
2403 struct tcpcb *tp;
2404
2405 if (sb)
2406 sp = sb->sb_so;
2407 if (sp == NULL)
2408 return;
2409
2410 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2411
2412 for (evq = sp->so_evlist.tqh_first;
2413 evq != NULL; evq = evq->ee_slist.tqe_next) {
2414
2415 if (evq->ee_eventmask == 0)
2416 continue;
2417 mask = 0;
2418
2419 /* ready for reading:
2420 - byte cnt >= receive low water mark
2421 - read-half of conn closed
2422 - conn pending for listening sock
2423 - socket error pending
2424
2425 ready for writing
2426 - byte cnt avail >= send low water mark
2427 - write half of conn closed
2428 - socket error pending
2429 - non-blocking conn completed successfully
2430
2431 exception pending
2432 - out of band data
2433 - sock at out of band mark
2434 */
2435
2436 switch (event & EV_DMASK) {
2437
2438 case EV_OOB:
2439 if ((evq->ee_eventmask & EV_EX)) {
2440 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2441 mask |= EV_EX|EV_OOB;
2442 }
2443 break;
2444
2445 case EV_RWBYTES|EV_OOB:
2446 if ((evq->ee_eventmask & EV_EX)) {
2447 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2448 mask |= EV_EX|EV_OOB;
2449 }
2450 /*
2451 * fall into the next case
2452 */
2453 case EV_RWBYTES:
2454 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
39236c6e
A
2455 /* for AFP/OT purposes; may go away in future */
2456 if ((SOCK_DOM(sp) == PF_INET ||
2457 SOCK_DOM(sp) == PF_INET6) &&
2458 SOCK_PROTO(sp) == IPPROTO_TCP &&
2459 (sp->so_error == ECONNREFUSED ||
2460 sp->so_error == ECONNRESET)) {
2461 if (sp->so_pcb == NULL ||
2462 sotoinpcb(sp)->inp_state ==
2463 INPCB_STATE_DEAD ||
2464 (tp = sototcpcb(sp)) == NULL ||
2465 tp->t_state == TCPS_CLOSED) {
2466 mask |= EV_RE|EV_RESET;
2467 break;
91447636
A
2468 }
2469 }
2470 mask |= EV_RE;
2471 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2472
2473 if (sp->so_state & SS_CANTRCVMORE) {
2474 mask |= EV_FIN;
2475 break;
2476 }
2477 }
2478 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
39236c6e
A
2479 /* for AFP/OT purposes; may go away in future */
2480 if ((SOCK_DOM(sp) == PF_INET ||
2481 SOCK_DOM(sp) == PF_INET6) &&
2482 SOCK_PROTO(sp) == IPPROTO_TCP &&
2483 (sp->so_error == ECONNREFUSED ||
2484 sp->so_error == ECONNRESET)) {
2485 if (sp->so_pcb == NULL ||
2486 sotoinpcb(sp)->inp_state ==
2487 INPCB_STATE_DEAD ||
2488 (tp = sototcpcb(sp)) == NULL ||
2489 tp->t_state == TCPS_CLOSED) {
2490 mask |= EV_WR|EV_RESET;
2491 break;
91447636
A
2492 }
2493 }
2494 mask |= EV_WR;
2495 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2496 }
2497 break;
2498
2499 case EV_RCONN:
2500 if ((evq->ee_eventmask & EV_RE)) {
2501 mask |= EV_RE|EV_RCONN;
2502 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
2503 }
2504 break;
2505
2506 case EV_WCONN:
2507 if ((evq->ee_eventmask & EV_WR)) {
2508 mask |= EV_WR|EV_WCONN;
2509 }
2510 break;
2511
2512 case EV_RCLOSED:
2513 if ((evq->ee_eventmask & EV_RE)) {
2514 mask |= EV_RE|EV_RCLOSED;
2515 }
2516 break;
2517
2518 case EV_WCLOSED:
2519 if ((evq->ee_eventmask & EV_WR)) {
2520 mask |= EV_WR|EV_WCLOSED;
2521 }
2522 break;
2523
2524 case EV_FIN:
2525 if (evq->ee_eventmask & EV_RE) {
2526 mask |= EV_RE|EV_FIN;
2527 }
2528 break;
2529
2530 case EV_RESET:
2531 case EV_TIMEOUT:
2532 if (evq->ee_eventmask & EV_RE) {
2533 mask |= EV_RE | event;
2534 }
2535 if (evq->ee_eventmask & EV_WR) {
2536 mask |= EV_WR | event;
2537 }
2538 break;
2539
2540 default:
2541 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2542 return;
2543 } /* switch */
2544
2545 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2546
2547 if (mask) {
2548 /*
2549 * disarm... postevents are nops until this event is 'read' via
2550 * waitevent and then re-armed via modwatch
2551 */
2552 evq->ee_eventmask = 0;
2553
2554 /*
2555 * since events are disarmed until after the waitevent
2556 * the ee_req.er_xxxx fields can't change once we've
2557 * inserted this event into the proc queue...
2558 * since waitevent can't see this event until we
2559 * enqueue it, waitevent will see a 'consistent'
2560 * snapshot of the event, even though it won't hold
2561 * the socket lock, and we're updating the event outside
2562 * of the proc lock, which it will hold
2563 */
2564 evq->ee_req.er_eventbits |= mask;
2565
2566 evprocenque(evq);
2567 }
2568 }
2569 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
1c79356b 2570}
2d21ac55 2571#endif /* SOCKETS */
1c79356b 2572
1c79356b
A
2573
2574/*
2575 * watchevent system call. user passes us an event to watch
2576 * for. we malloc an event object, initialize it, and queue
2577 * it to the open socket. when the event occurs, postevent()
2578 * will enque it back to our proc where we can retrieve it
2579 * via waitevent().
2580 *
2581 * should this prevent duplicate events on same socket?
2d21ac55
A
2582 *
2583 * Returns:
2584 * ENOMEM No memory for operation
2585 * copyin:EFAULT
1c79356b
A
2586 */
2587int
91447636 2588watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
1c79356b 2589{
91447636
A
2590 struct eventqelt *evq = (struct eventqelt *)0;
2591 struct eventqelt *np = NULL;
2d21ac55 2592 struct eventreq64 *erp;
91447636
A
2593 struct fileproc *fp = NULL;
2594 int error;
2595
2596 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2597
2598 // get a qelt and fill with users req
2599 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2600
2601 if (evq == NULL)
2d21ac55 2602 return (ENOMEM);
91447636
A
2603 erp = &evq->ee_req;
2604
2605 // get users request pkt
91447636 2606
2d21ac55
A
2607 if (IS_64BIT_PROCESS(p)) {
2608 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2609 } else {
2610 struct eventreq32 er32;
2611
2612 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2613 if (error == 0) {
2614 /*
2615 * the user only passes in the
2616 * er_type, er_handle and er_data...
2617 * the other fields are initialized
2618 * below, so don't bother to copy
2619 */
2620 erp->er_type = er32.er_type;
2621 erp->er_handle = er32.er_handle;
2622 erp->er_data = (user_addr_t)er32.er_data;
2623 }
2624 }
2625 if (error) {
2626 FREE(evq, M_TEMP);
91447636 2627 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2d21ac55
A
2628
2629 return(error);
91447636 2630 }
2d21ac55 2631 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2632
2633 // validate, freeing qelt if errors
2634 error = 0;
2635 proc_fdlock(p);
2636
2637 if (erp->er_type != EV_FD) {
2638 error = EINVAL;
2639 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2640 error = EBADF;
2d21ac55 2641#if SOCKETS
91447636
A
2642 } else if (fp->f_type == DTYPE_SOCKET) {
2643 socket_lock((struct socket *)fp->f_data, 1);
2644 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2645#endif /* SOCKETS */
91447636
A
2646 } else if (fp->f_type == DTYPE_PIPE) {
2647 PIPE_LOCK((struct pipe *)fp->f_data);
2648 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2649 } else {
2650 fp_drop(p, erp->er_handle, fp, 1);
2651 error = EINVAL;
2652 }
2653 proc_fdunlock(p);
2654
2655 if (error) {
2656 FREE(evq, M_TEMP);
2657
2658 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2659 return(error);
2660 }
2661
2662 /*
2663 * only allow one watch per file per proc
2664 */
2665 for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2666 if (np->ee_proc == p) {
2d21ac55 2667#if SOCKETS
91447636
A
2668 if (fp->f_type == DTYPE_SOCKET)
2669 socket_unlock((struct socket *)fp->f_data, 1);
2670 else
2d21ac55 2671#endif /* SOCKETS */
91447636
A
2672 PIPE_UNLOCK((struct pipe *)fp->f_data);
2673 fp_drop(p, erp->er_handle, fp, 0);
2674 FREE(evq, M_TEMP);
2675
2676 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2677 return(EINVAL);
2678 }
2679 }
2680 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2681 evq->ee_proc = p;
2682 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2683 evq->ee_flags = 0;
2684
2d21ac55 2685#if SOCKETS
91447636
A
2686 if (fp->f_type == DTYPE_SOCKET) {
2687 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2688 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2689
2690 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2691 } else
2692#endif /* SOCKETS */
2693 {
91447636
A
2694 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2695 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2696
2697 PIPE_UNLOCK((struct pipe *)fp->f_data);
2698 }
2699 fp_drop_event(p, erp->er_handle, fp);
2700
2701 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2702 return(0);
1c79356b
A
2703}
2704
91447636 2705
1c79356b
A
2706
2707/*
2708 * waitevent system call.
2709 * grabs the next waiting event for this proc and returns
2710 * it. if no events, user can request to sleep with timeout
2d21ac55
A
2711 * or without or poll mode
2712 * ((tv != NULL && interval == 0) || tv == -1)
1c79356b
A
2713 */
2714int
91447636 2715waitevent(proc_t p, struct waitevent_args *uap, int *retval)
1c79356b 2716{
91447636
A
2717 int error = 0;
2718 struct eventqelt *evq;
2d21ac55 2719 struct eventreq64 *erp;
9bccf70c 2720 uint64_t abstime, interval;
2d21ac55
A
2721 boolean_t fast_poll = FALSE;
2722 union {
2723 struct eventreq64 er64;
2724 struct eventreq32 er32;
2725 } uer;
2726
2727 interval = 0;
1c79356b
A
2728
2729 if (uap->tv) {
9bccf70c 2730 struct timeval atv;
2d21ac55
A
2731 /*
2732 * check for fast poll method
2733 */
2734 if (IS_64BIT_PROCESS(p)) {
2735 if (uap->tv == (user_addr_t)-1)
2736 fast_poll = TRUE;
2737 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2738 fast_poll = TRUE;
2739
2740 if (fast_poll == TRUE) {
2741 if (p->p_evlist.tqh_first == NULL) {
2742 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2743 /*
2744 * poll failed
2745 */
2746 *retval = 1;
2747 return (0);
2748 }
2749 proc_lock(p);
2750 goto retry;
2751 }
b0d623f7
A
2752 if (IS_64BIT_PROCESS(p)) {
2753 struct user64_timeval atv64;
2754 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
2755 /* Loses resolution - assume timeout < 68 years */
2756 atv.tv_sec = atv64.tv_sec;
2757 atv.tv_usec = atv64.tv_usec;
2758 } else {
2759 struct user32_timeval atv32;
2760 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
2761 atv.tv_sec = atv32.tv_sec;
2762 atv.tv_usec = atv32.tv_usec;
2763 }
9bccf70c 2764
1c79356b 2765 if (error)
9bccf70c 2766 return(error);
1c79356b
A
2767 if (itimerfix(&atv)) {
2768 error = EINVAL;
2769 return(error);
2770 }
9bccf70c 2771 interval = tvtoabstime(&atv);
2d21ac55 2772 }
9bccf70c 2773 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1c79356b 2774
91447636 2775 proc_lock(p);
1c79356b 2776retry:
91447636
A
2777 if ((evq = p->p_evlist.tqh_first) != NULL) {
2778 /*
2779 * found one... make a local copy while it's still on the queue
2780 * to prevent it from changing while in the midst of copying
2781 * don't want to hold the proc lock across a copyout because
2782 * it might block on a page fault at the target in user space
2783 */
2d21ac55 2784 erp = &evq->ee_req;
91447636 2785
2d21ac55
A
2786 if (IS_64BIT_PROCESS(p))
2787 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2788 else {
2789 uer.er32.er_type = erp->er_type;
2790 uer.er32.er_handle = erp->er_handle;
2791 uer.er32.er_data = (uint32_t)erp->er_data;
2792 uer.er32.er_ecnt = erp->er_ecnt;
2793 uer.er32.er_rcnt = erp->er_rcnt;
2794 uer.er32.er_wcnt = erp->er_wcnt;
2795 uer.er32.er_eventbits = erp->er_eventbits;
2796 }
91447636
A
2797 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2798
2799 evq->ee_flags &= ~EV_QUEUED;
1c79356b 2800
91447636
A
2801 proc_unlock(p);
2802
2d21ac55
A
2803 if (IS_64BIT_PROCESS(p))
2804 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2805 else
2806 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
91447636
A
2807
2808 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2d21ac55 2809 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
9bccf70c
A
2810 return (error);
2811 }
2812 else {
2813 if (uap->tv && interval == 0) {
91447636 2814 proc_unlock(p);
9bccf70c 2815 *retval = 1; // poll failed
9bccf70c 2816
91447636 2817 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
9bccf70c
A
2818 return (error);
2819 }
9bccf70c 2820 if (interval != 0)
55e303ae 2821 clock_absolutetime_interval_to_deadline(interval, &abstime);
91447636
A
2822 else
2823 abstime = 0;
9bccf70c 2824
2d21ac55 2825 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
91447636
A
2826
2827 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2828
2d21ac55 2829 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
91447636 2830
9bccf70c
A
2831 if (error == 0)
2832 goto retry;
2833 if (error == ERESTART)
2834 error = EINTR;
2835 if (error == EWOULDBLOCK) {
2836 *retval = 1;
2837 error = 0;
2838 }
2839 }
91447636 2840 proc_unlock(p);
9bccf70c
A
2841
2842 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
9bccf70c 2843 return (error);
1c79356b
A
2844}
2845
1c79356b
A
2846
2847/*
2848 * modwatch system call. user passes in event to modify.
2849 * if we find it we reset the event bits and que/deque event
2850 * it needed.
2851 */
2852int
91447636 2853modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
1c79356b 2854{
2d21ac55
A
2855 struct eventreq64 er;
2856 struct eventreq64 *erp = &er;
2857 struct eventqelt *evq = NULL; /* protected by error return */
91447636
A
2858 int error;
2859 struct fileproc *fp;
2860 int flag;
2861
2862 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2863
2864 /*
2865 * get user's request pkt
2d21ac55
A
2866 * just need the er_type and er_handle which sit above the
2867 * problematic er_data (32/64 issue)... so only copy in
2868 * those 2 fields
91447636 2869 */
2d21ac55
A
2870 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2871 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
91447636
A
2872 return(error);
2873 }
2874 proc_fdlock(p);
2875
2876 if (erp->er_type != EV_FD) {
2877 error = EINVAL;
2878 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2879 error = EBADF;
2d21ac55 2880#if SOCKETS
91447636
A
2881 } else if (fp->f_type == DTYPE_SOCKET) {
2882 socket_lock((struct socket *)fp->f_data, 1);
2883 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2884#endif /* SOCKETS */
91447636
A
2885 } else if (fp->f_type == DTYPE_PIPE) {
2886 PIPE_LOCK((struct pipe *)fp->f_data);
2887 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2888 } else {
2889 fp_drop(p, erp->er_handle, fp, 1);
2890 error = EINVAL;
2891 }
2892
2893 if (error) {
2894 proc_fdunlock(p);
2895 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2896 return(error);
2897 }
2898
2899 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2900 fp->f_flags &= ~FP_WAITEVENT;
2901 }
2902 proc_fdunlock(p);
2903
2904 // locate event if possible
2905 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2906 if (evq->ee_proc == p)
2907 break;
2908 }
2909 if (evq == NULL) {
2d21ac55 2910#if SOCKETS
91447636
A
2911 if (fp->f_type == DTYPE_SOCKET)
2912 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2913 else
2914#endif /* SOCKETS */
91447636
A
2915 PIPE_UNLOCK((struct pipe *)fp->f_data);
2916 fp_drop(p, erp->er_handle, fp, 0);
2917 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2918 return(EINVAL);
2919 }
2d21ac55 2920 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2921
2922 if (uap->u_eventmask == EV_RM) {
2923 EVPROCDEQUE(p, evq);
2924
2d21ac55 2925#if SOCKETS
91447636
A
2926 if (fp->f_type == DTYPE_SOCKET) {
2927 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2928 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2929 } else
2930#endif /* SOCKETS */
2931 {
91447636
A
2932 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2933 PIPE_UNLOCK((struct pipe *)fp->f_data);
2934 }
2935 fp_drop(p, erp->er_handle, fp, 0);
2936 FREE(evq, M_TEMP);
2937 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2938 return(0);
2939 }
2940 switch (uap->u_eventmask & EV_MASK) {
1c79356b 2941
91447636
A
2942 case 0:
2943 flag = 0;
2944 break;
2945
2946 case EV_RE:
2947 case EV_WR:
2948 case EV_RE|EV_WR:
2949 flag = EV_RWBYTES;
2950 break;
2951
2952 case EV_EX:
2953 flag = EV_OOB;
2954 break;
2955
2956 case EV_EX|EV_RE:
2957 case EV_EX|EV_WR:
2958 case EV_EX|EV_RE|EV_WR:
2959 flag = EV_OOB|EV_RWBYTES;
2960 break;
2961
2962 default:
2d21ac55 2963#if SOCKETS
91447636
A
2964 if (fp->f_type == DTYPE_SOCKET)
2965 socket_unlock((struct socket *)fp->f_data, 1);
2966 else
2d21ac55 2967#endif /* SOCKETS */
91447636
A
2968 PIPE_UNLOCK((struct pipe *)fp->f_data);
2969 fp_drop(p, erp->er_handle, fp, 0);
2970 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2971 return(EINVAL);
2972 }
2973 /*
2974 * since we're holding the socket/pipe lock, the event
2975 * cannot go from the unqueued state to the queued state
2976 * however, it can go from the queued state to the unqueued state
2977 * since that direction is protected by the proc_lock...
2978 * so do a quick check for EV_QUEUED w/o holding the proc lock
2979 * since by far the common case will be NOT EV_QUEUED, this saves
2980 * us taking the proc_lock the majority of the time
2981 */
2982 if (evq->ee_flags & EV_QUEUED) {
2983 /*
2984 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2985 */
2986 EVPROCDEQUE(p, evq);
2987 }
2988 /*
2989 * while the event is off the proc queue and
2990 * we're holding the socket/pipe lock
2991 * it's safe to update these fields...
2992 */
2993 evq->ee_req.er_eventbits = 0;
2994 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2995
2d21ac55 2996#if SOCKETS
91447636
A
2997 if (fp->f_type == DTYPE_SOCKET) {
2998 postevent((struct socket *)fp->f_data, 0, flag);
2999 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
3000 } else
3001#endif /* SOCKETS */
3002 {
91447636
A
3003 postpipeevent((struct pipe *)fp->f_data, flag);
3004 PIPE_UNLOCK((struct pipe *)fp->f_data);
3005 }
3006 fp_drop(p, erp->er_handle, fp, 0);
2d21ac55 3007 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
91447636 3008 return(0);
1c79356b 3009}
91447636
A
3010
3011/* this routine is called from the close of fd with proc_fdlock held */
3012int
3013waitevent_close(struct proc *p, struct fileproc *fp)
3014{
3015 struct eventqelt *evq;
3016
3017
3018 fp->f_flags &= ~FP_WAITEVENT;
3019
2d21ac55 3020#if SOCKETS
91447636
A
3021 if (fp->f_type == DTYPE_SOCKET) {
3022 socket_lock((struct socket *)fp->f_data, 1);
3023 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55
A
3024 } else
3025#endif /* SOCKETS */
3026 if (fp->f_type == DTYPE_PIPE) {
91447636
A
3027 PIPE_LOCK((struct pipe *)fp->f_data);
3028 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
3029 }
3030 else {
3031 return(EINVAL);
3032 }
3033 proc_fdunlock(p);
3034
3035
3036 // locate event if possible
3037 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
3038 if (evq->ee_proc == p)
3039 break;
3040 }
3041 if (evq == NULL) {
2d21ac55 3042#if SOCKETS
91447636
A
3043 if (fp->f_type == DTYPE_SOCKET)
3044 socket_unlock((struct socket *)fp->f_data, 1);
3045 else
2d21ac55 3046#endif /* SOCKETS */
91447636
A
3047 PIPE_UNLOCK((struct pipe *)fp->f_data);
3048
3049 proc_fdlock(p);
3050
3051 return(EINVAL);
3052 }
3053 EVPROCDEQUE(p, evq);
3054
2d21ac55 3055#if SOCKETS
91447636
A
3056 if (fp->f_type == DTYPE_SOCKET) {
3057 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
3058 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
3059 } else
3060#endif /* SOCKETS */
3061 {
91447636
A
3062 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
3063 PIPE_UNLOCK((struct pipe *)fp->f_data);
3064 }
3065 FREE(evq, M_TEMP);
3066
3067 proc_fdlock(p);
3068
3069 return(0);
3070}
3071
2d21ac55
A
3072
3073/*
3074 * gethostuuid
3075 *
3076 * Description: Get the host UUID from IOKit and return it to user space.
3077 *
3078 * Parameters: uuid_buf Pointer to buffer to receive UUID
3079 * timeout Timespec for timout
39236c6e 3080 * spi SPI, skip sandbox check (temporary)
2d21ac55
A
3081 *
3082 * Returns: 0 Success
3083 * EWOULDBLOCK Timeout is too short
3084 * copyout:EFAULT Bad user buffer
fe8ab488 3085 * mac_system_check_info:EPERM Client not allowed to perform this operation
2d21ac55
A
3086 *
3087 * Notes: A timeout seems redundant, since if it's tolerable to not
3088 * have a system UUID in hand, then why ask for one?
3089 */
3090int
b0d623f7 3091gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2d21ac55
A
3092{
3093 kern_return_t kret;
3094 int error;
3095 mach_timespec_t mach_ts; /* for IOKit call */
3096 __darwin_uuid_t uuid_kern; /* for IOKit call */
3097
39236c6e 3098 if (!uap->spi) {
39236c6e
A
3099 }
3100
2d21ac55
A
3101 /* Convert the 32/64 bit timespec into a mach_timespec_t */
3102 if ( proc_is64bit(p) ) {
b0d623f7 3103 struct user64_timespec ts;
2d21ac55
A
3104 error = copyin(uap->timeoutp, &ts, sizeof(ts));
3105 if (error)
3106 return (error);
3107 mach_ts.tv_sec = ts.tv_sec;
3108 mach_ts.tv_nsec = ts.tv_nsec;
3109 } else {
b0d623f7 3110 struct user32_timespec ts;
2d21ac55
A
3111 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
3112 if (error)
3113 return (error);
3114 mach_ts.tv_sec = ts.tv_sec;
3115 mach_ts.tv_nsec = ts.tv_nsec;
3116 }
3117
3118 /* Call IOKit with the stack buffer to get the UUID */
3119 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
3120
3121 /*
3122 * If we get it, copy out the data to the user buffer; note that a
3123 * uuid_t is an array of characters, so this is size invariant for
3124 * 32 vs. 64 bit.
3125 */
3126 if (kret == KERN_SUCCESS) {
3127 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
3128 } else {
3129 error = EWOULDBLOCK;
3130 }
3131
3132 return (error);
3133}
316670eb
A
3134
3135/*
3136 * ledger
3137 *
3138 * Description: Omnibus system call for ledger operations
3139 */
3140int
3141ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
3142{
39236c6e
A
3143#if !CONFIG_MACF
3144#pragma unused(p)
3145#endif
316670eb
A
3146 int rval, pid, len, error;
3147#ifdef LEDGER_DEBUG
3148 struct ledger_limit_args lla;
3149#endif
3150 task_t task;
3151 proc_t proc;
3152
3153 /* Finish copying in the necessary args before taking the proc lock */
3154 error = 0;
3155 len = 0;
3156 if (args->cmd == LEDGER_ENTRY_INFO)
3157 error = copyin(args->arg3, (char *)&len, sizeof (len));
3158 else if (args->cmd == LEDGER_TEMPLATE_INFO)
3159 error = copyin(args->arg2, (char *)&len, sizeof (len));
3160#ifdef LEDGER_DEBUG
3161 else if (args->cmd == LEDGER_LIMIT)
3162 error = copyin(args->arg2, (char *)&lla, sizeof (lla));
3163#endif
ecc0ceb4
A
3164 else if ((args->cmd < 0) || (args->cmd > LEDGER_MAX_CMD))
3165 return (EINVAL);
3166
316670eb
A
3167 if (error)
3168 return (error);
3169 if (len < 0)
3170 return (EINVAL);
3171
3172 rval = 0;
3173 if (args->cmd != LEDGER_TEMPLATE_INFO) {
3174 pid = args->arg1;
3175 proc = proc_find(pid);
3176 if (proc == NULL)
3177 return (ESRCH);
3178
3179#if CONFIG_MACF
3180 error = mac_proc_check_ledger(p, proc, args->cmd);
3181 if (error) {
3182 proc_rele(proc);
3183 return (error);
3184 }
3185#endif
3186
3187 task = proc->task;
3188 }
3189
3190 switch (args->cmd) {
3191#ifdef LEDGER_DEBUG
3192 case LEDGER_LIMIT: {
39236c6e 3193 if (!kauth_cred_issuser(kauth_cred_get()))
316670eb
A
3194 rval = EPERM;
3195 rval = ledger_limit(task, &lla);
3196 proc_rele(proc);
3197 break;
3198 }
3199#endif
3200 case LEDGER_INFO: {
3201 struct ledger_info info;
3202
3203 rval = ledger_info(task, &info);
3204 proc_rele(proc);
3205 if (rval == 0)
3206 rval = copyout(&info, args->arg2,
3207 sizeof (info));
3208 break;
3209 }
3210
3211 case LEDGER_ENTRY_INFO: {
3212 void *buf;
3213 int sz;
3214
39236c6e 3215 rval = ledger_get_task_entry_info_multiple(task, &buf, &len);
316670eb 3216 proc_rele(proc);
39037602 3217 if ((rval == 0) && (len >= 0)) {
316670eb
A
3218 sz = len * sizeof (struct ledger_entry_info);
3219 rval = copyout(buf, args->arg2, sz);
3220 kfree(buf, sz);
3221 }
3222 if (rval == 0)
3223 rval = copyout(&len, args->arg3, sizeof (len));
3224 break;
3225 }
3226
3227 case LEDGER_TEMPLATE_INFO: {
3228 void *buf;
3229 int sz;
3230
3231 rval = ledger_template_info(&buf, &len);
39037602 3232 if ((rval == 0) && (len >= 0)) {
316670eb
A
3233 sz = len * sizeof (struct ledger_template_info);
3234 rval = copyout(buf, args->arg1, sz);
3235 kfree(buf, sz);
3236 }
3237 if (rval == 0)
3238 rval = copyout(&len, args->arg2, sizeof (len));
3239 break;
3240 }
3241
3242 default:
ecc0ceb4
A
3243 panic("ledger syscall logic error -- command type %d", args->cmd);
3244 proc_rele(proc);
316670eb
A
3245 rval = EINVAL;
3246 }
3247
3248 return (rval);
3249}
39236c6e 3250
39236c6e
A
3251int
3252telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval)
3253{
3254 int error = 0;
3255
3256 switch (args->cmd) {
fe8ab488 3257#if CONFIG_TELEMETRY
39236c6e
A
3258 case TELEMETRY_CMD_TIMER_EVENT:
3259 error = telemetry_timer_event(args->deadline, args->interval, args->leeway);
3260 break;
fe8ab488
A
3261#endif /* CONFIG_TELEMETRY */
3262 case TELEMETRY_CMD_VOUCHER_NAME:
3263 if (thread_set_voucher_name((mach_port_name_t)args->deadline))
3264 error = EINVAL;
3265 break;
3266
39236c6e
A
3267 default:
3268 error = EINVAL;
3269 break;
3270 }
3271
3272 return (error);
3273}
3e170ce0
A
3274
3275#if defined(DEVELOPMENT) || defined(DEBUG)
3276#if CONFIG_WAITQ_DEBUG
3277static uint64_t g_wqset_num = 0;
3278struct g_wqset {
3279 queue_chain_t link;
3280 struct waitq_set *wqset;
3281};
3282
3283static queue_head_t g_wqset_list;
3284static struct waitq_set *g_waitq_set = NULL;
3285
3286static inline struct waitq_set *sysctl_get_wqset(int idx)
3287{
3288 struct g_wqset *gwqs;
3289
3290 if (!g_wqset_num)
3291 queue_init(&g_wqset_list);
3292
3293 /* don't bother with locks: this is test-only code! */
3294 qe_foreach_element(gwqs, &g_wqset_list, link) {
3295 if ((int)(wqset_id(gwqs->wqset) & 0xffffffff) == idx)
3296 return gwqs->wqset;
3297 }
3298
3299 /* allocate a new one */
3300 ++g_wqset_num;
3301 gwqs = (struct g_wqset *)kalloc(sizeof(*gwqs));
3302 assert(gwqs != NULL);
3303
39037602 3304 gwqs->wqset = waitq_set_alloc(SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST, NULL);
3e170ce0
A
3305 enqueue_tail(&g_wqset_list, &gwqs->link);
3306 printf("[WQ]: created new waitq set 0x%llx\n", wqset_id(gwqs->wqset));
3307
3308 return gwqs->wqset;
3309}
3310
3311#define MAX_GLOBAL_TEST_QUEUES 64
3312static int g_wq_init = 0;
3313static struct waitq g_wq[MAX_GLOBAL_TEST_QUEUES];
3314
3315static inline struct waitq *global_test_waitq(int idx)
3316{
3317 if (idx < 0)
3318 return NULL;
3319
3320 if (!g_wq_init) {
3321 g_wq_init = 1;
3322 for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++)
39037602 3323 waitq_init(&g_wq[i], SYNC_POLICY_FIFO);
3e170ce0
A
3324 }
3325
3326 return &g_wq[idx % MAX_GLOBAL_TEST_QUEUES];
3327}
3328
3329static int sysctl_waitq_wakeup_one SYSCTL_HANDLER_ARGS
3330{
3331#pragma unused(oidp, arg1, arg2)
3332 int error;
3333 int index;
3334 struct waitq *waitq;
3335 kern_return_t kr;
3336 int64_t event64 = 0;
3337
3338 error = SYSCTL_IN(req, &event64, sizeof(event64));
3339 if (error)
3340 return error;
3341
3342 if (!req->newptr)
3343 return SYSCTL_OUT(req, &event64, sizeof(event64));
3344
3345 if (event64 < 0) {
3346 index = (int)((-event64) & 0xffffffff);
3347 waitq = wqset_waitq(sysctl_get_wqset(index));
3348 index = -index;
3349 } else {
3350 index = (int)event64;
3351 waitq = global_test_waitq(index);
3352 }
3353
3354 event64 = 0;
3355
3356 printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
3357 index, event64);
3358 kr = waitq_wakeup64_one(waitq, (event64_t)event64, THREAD_AWAKENED,
3359 WAITQ_ALL_PRIORITIES);
3360 printf("[WQ]: \tkr=%d\n", kr);
3361
3362 return SYSCTL_OUT(req, &kr, sizeof(kr));
3363}
3364SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_one, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3365 0, 0, sysctl_waitq_wakeup_one, "Q", "wakeup one thread waiting on given event");
3366
3367
3368static int sysctl_waitq_wakeup_all SYSCTL_HANDLER_ARGS
3369{
3370#pragma unused(oidp, arg1, arg2)
3371 int error;
3372 int index;
3373 struct waitq *waitq;
3374 kern_return_t kr;
3375 int64_t event64 = 0;
3376
3377 error = SYSCTL_IN(req, &event64, sizeof(event64));
3378 if (error)
3379 return error;
3380
3381 if (!req->newptr)
3382 return SYSCTL_OUT(req, &event64, sizeof(event64));
3383
3384 if (event64 < 0) {
3385 index = (int)((-event64) & 0xffffffff);
3386 waitq = wqset_waitq(sysctl_get_wqset(index));
3387 index = -index;
3388 } else {
3389 index = (int)event64;
3390 waitq = global_test_waitq(index);
3391 }
3392
3393 event64 = 0;
3394
3395 printf("[WQ]: Waking all threads on waitq [%d] event:0x%llx\n",
3396 index, event64);
3397 kr = waitq_wakeup64_all(waitq, (event64_t)event64,
3398 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
3399 printf("[WQ]: \tkr=%d\n", kr);
3400
3401 return SYSCTL_OUT(req, &kr, sizeof(kr));
3402}
3403SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3404 0, 0, sysctl_waitq_wakeup_all, "Q", "wakeup all threads waiting on given event");
3405
3406
3407static int sysctl_waitq_wait SYSCTL_HANDLER_ARGS
3408{
3409#pragma unused(oidp, arg1, arg2)
3410 int error;
3411 int index;
3412 struct waitq *waitq;
3413 kern_return_t kr;
3414 int64_t event64 = 0;
3415
3416 error = SYSCTL_IN(req, &event64, sizeof(event64));
3417 if (error)
3418 return error;
3419
3420 if (!req->newptr)
3421 return SYSCTL_OUT(req, &event64, sizeof(event64));
3422
3423 if (event64 < 0) {
3424 index = (int)((-event64) & 0xffffffff);
3425 waitq = wqset_waitq(sysctl_get_wqset(index));
3426 index = -index;
3427 } else {
3428 index = (int)event64;
3429 waitq = global_test_waitq(index);
3430 }
3431
3432 event64 = 0;
3433
3434 printf("[WQ]: Current thread waiting on waitq [%d] event:0x%llx\n",
3435 index, event64);
3436 kr = waitq_assert_wait64(waitq, (event64_t)event64, THREAD_INTERRUPTIBLE, 0);
3437 if (kr == THREAD_WAITING)
3438 thread_block(THREAD_CONTINUE_NULL);
3439 printf("[WQ]: \tWoke Up: kr=%d\n", kr);
3440
3441 return SYSCTL_OUT(req, &kr, sizeof(kr));
3442}
3443SYSCTL_PROC(_kern, OID_AUTO, waitq_wait, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3444 0, 0, sysctl_waitq_wait, "Q", "start waiting on given event");
3445
3446
3447static int sysctl_wqset_select SYSCTL_HANDLER_ARGS
3448{
3449#pragma unused(oidp, arg1, arg2)
3450 int error;
3451 struct waitq_set *wqset;
3452 uint64_t event64 = 0;
3453
3454 error = SYSCTL_IN(req, &event64, sizeof(event64));
3455 if (error)
3456 return error;
3457
3458 if (!req->newptr)
3459 goto out;
3460
3461 wqset = sysctl_get_wqset((int)(event64 & 0xffffffff));
3462 g_waitq_set = wqset;
3463
3464 event64 = wqset_id(wqset);
3465 printf("[WQ]: selected wqset 0x%llx\n", event64);
3466
3467out:
3468 if (g_waitq_set)
3469 event64 = wqset_id(g_waitq_set);
3470 else
3471 event64 = (uint64_t)(-1);
3472
3473 return SYSCTL_OUT(req, &event64, sizeof(event64));
3474}
3475SYSCTL_PROC(_kern, OID_AUTO, wqset_select, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3476 0, 0, sysctl_wqset_select, "Q", "select/create a global waitq set");
3477
3478
3479static int sysctl_waitq_link SYSCTL_HANDLER_ARGS
3480{
3481#pragma unused(oidp, arg1, arg2)
3482 int error;
3483 int index;
3484 struct waitq *waitq;
3485 struct waitq_set *wqset;
3486 kern_return_t kr;
3487 uint64_t reserved_link = 0;
3488 int64_t event64 = 0;
3489
3490 error = SYSCTL_IN(req, &event64, sizeof(event64));
3491 if (error)
3492 return error;
3493
3494 if (!req->newptr)
3495 return SYSCTL_OUT(req, &event64, sizeof(event64));
3496
3497 if (!g_waitq_set)
3498 g_waitq_set = sysctl_get_wqset(1);
3499 wqset = g_waitq_set;
3500
3501 if (event64 < 0) {
3502 struct waitq_set *tmp;
3503 index = (int)((-event64) & 0xffffffff);
3504 tmp = sysctl_get_wqset(index);
3505 if (tmp == wqset)
3506 goto out;
3507 waitq = wqset_waitq(tmp);
3508 index = -index;
3509 } else {
3510 index = (int)event64;
3511 waitq = global_test_waitq(index);
3512 }
3513
3514 printf("[WQ]: linking waitq [%d] to global wqset (0x%llx)\n",
3515 index, wqset_id(wqset));
3516 reserved_link = waitq_link_reserve(waitq);
3517 kr = waitq_link(waitq, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
3518 waitq_link_release(reserved_link);
3519
3520 printf("[WQ]: \tkr=%d\n", kr);
3521
3522out:
3523 return SYSCTL_OUT(req, &kr, sizeof(kr));
3524}
3525SYSCTL_PROC(_kern, OID_AUTO, waitq_link, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3526 0, 0, sysctl_waitq_link, "Q", "link global waitq to test waitq set");
3527
3528
3529static int sysctl_waitq_unlink SYSCTL_HANDLER_ARGS
3530{
3531#pragma unused(oidp, arg1, arg2)
3532 int error;
3533 int index;
3534 struct waitq *waitq;
3535 struct waitq_set *wqset;
3536 kern_return_t kr;
3537 uint64_t event64 = 0;
3538
3539 error = SYSCTL_IN(req, &event64, sizeof(event64));
3540 if (error)
3541 return error;
3542
3543 if (!req->newptr)
3544 return SYSCTL_OUT(req, &event64, sizeof(event64));
3545
3546 if (!g_waitq_set)
3547 g_waitq_set = sysctl_get_wqset(1);
3548 wqset = g_waitq_set;
3549
3550 index = (int)event64;
3551 waitq = global_test_waitq(index);
3552
3553 printf("[WQ]: unlinking waitq [%d] from global wqset (0x%llx)\n",
3554 index, wqset_id(wqset));
3555
3556 kr = waitq_unlink(waitq, wqset);
3557 printf("[WQ]: \tkr=%d\n", kr);
3558
3559 return SYSCTL_OUT(req, &kr, sizeof(kr));
3560}
3561SYSCTL_PROC(_kern, OID_AUTO, waitq_unlink, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3562 0, 0, sysctl_waitq_unlink, "Q", "unlink global waitq from test waitq set");
3563
3564
3565static int sysctl_waitq_clear_prepost SYSCTL_HANDLER_ARGS
3566{
3567#pragma unused(oidp, arg1, arg2)
3568 struct waitq *waitq;
3569 uint64_t event64 = 0;
3570 int error, index;
3571
3572 error = SYSCTL_IN(req, &event64, sizeof(event64));
3573 if (error)
3574 return error;
3575
3576 if (!req->newptr)
3577 return SYSCTL_OUT(req, &event64, sizeof(event64));
3578
3579 index = (int)event64;
3580 waitq = global_test_waitq(index);
3581
3582 printf("[WQ]: clearing prepost on waitq [%d]\n", index);
3583 waitq_clear_prepost(waitq);
3584
3585 return SYSCTL_OUT(req, &event64, sizeof(event64));
3586}
3587SYSCTL_PROC(_kern, OID_AUTO, waitq_clear_prepost, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3588 0, 0, sysctl_waitq_clear_prepost, "Q", "clear prepost on given waitq");
3589
3590
3591static int sysctl_wqset_unlink_all SYSCTL_HANDLER_ARGS
3592{
3593#pragma unused(oidp, arg1, arg2)
3594 int error;
3595 struct waitq_set *wqset;
3596 kern_return_t kr;
3597 uint64_t event64 = 0;
3598
3599 error = SYSCTL_IN(req, &event64, sizeof(event64));
3600 if (error)
3601 return error;
3602
3603 if (!req->newptr)
3604 return SYSCTL_OUT(req, &event64, sizeof(event64));
3605
3606 if (!g_waitq_set)
3607 g_waitq_set = sysctl_get_wqset(1);
3608 wqset = g_waitq_set;
3609
3610 printf("[WQ]: unlinking all queues from global wqset (0x%llx)\n",
3611 wqset_id(wqset));
3612
3613 kr = waitq_set_unlink_all(wqset);
3614 printf("[WQ]: \tkr=%d\n", kr);
3615
3616 return SYSCTL_OUT(req, &kr, sizeof(kr));
3617}
3618SYSCTL_PROC(_kern, OID_AUTO, wqset_unlink_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3619 0, 0, sysctl_wqset_unlink_all, "Q", "unlink all queues from test waitq set");
3620
3621
3622static int sysctl_wqset_clear_preposts SYSCTL_HANDLER_ARGS
3623{
3624#pragma unused(oidp, arg1, arg2)
3625 struct waitq_set *wqset = NULL;
3626 uint64_t event64 = 0;
3627 int error, index;
3628
3629 error = SYSCTL_IN(req, &event64, sizeof(event64));
3630 if (error)
3631 return error;
3632
3633 if (!req->newptr)
3634 goto out;
3635
3636 index = (int)((event64) & 0xffffffff);
3637 wqset = sysctl_get_wqset(index);
3638 assert(wqset != NULL);
3639
3640 printf("[WQ]: clearing preposts on wqset 0x%llx\n", wqset_id(wqset));
3641 waitq_set_clear_preposts(wqset);
3642
3643out:
3644 if (wqset)
3645 event64 = wqset_id(wqset);
3646 else
3647 event64 = (uint64_t)(-1);
3648
3649 return SYSCTL_OUT(req, &event64, sizeof(event64));
3650}
3651SYSCTL_PROC(_kern, OID_AUTO, wqset_clear_preposts, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3652 0, 0, sysctl_wqset_clear_preposts, "Q", "clear preposts on given waitq set");
3653
3654#endif /* CONFIG_WAITQ_DEBUG */
3655#endif /* defined(DEVELOPMENT) || defined(DEBUG) */