]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
2d21ac55
A
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
1c79356b
A
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/filedesc.h>
78#include <sys/ioctl.h>
91447636
A
79#include <sys/file_internal.h>
80#include <sys/proc_internal.h>
1c79356b 81#include <sys/socketvar.h>
91447636 82#include <sys/uio_internal.h>
1c79356b 83#include <sys/kernel.h>
fe8ab488 84#include <sys/guarded.h>
1c79356b
A
85#include <sys/stat.h>
86#include <sys/malloc.h>
91447636 87#include <sys/sysproto.h>
1c79356b 88
91447636 89#include <sys/mount_internal.h>
1c79356b
A
90#include <sys/protosw.h>
91#include <sys/ev.h>
92#include <sys/user.h>
93#include <sys/kdebug.h>
91447636
A
94#include <sys/poll.h>
95#include <sys/event.h>
96#include <sys/eventvar.h>
316670eb 97#include <sys/proc.h>
39236c6e 98#include <sys/kauth.h>
91447636 99
5ba3f43e 100#include <machine/smp.h>
91447636
A
101#include <mach/mach_types.h>
102#include <kern/kern_types.h>
1c79356b 103#include <kern/assert.h>
91447636
A
104#include <kern/kalloc.h>
105#include <kern/thread.h>
106#include <kern/clock.h>
316670eb
A
107#include <kern/ledger.h>
108#include <kern/task.h>
39236c6e 109#include <kern/telemetry.h>
3e170ce0
A
110#include <kern/waitq.h>
111#include <kern/sched_prim.h>
1c79356b
A
112
113#include <sys/mbuf.h>
39236c6e 114#include <sys/domain.h>
1c79356b
A
115#include <sys/socket.h>
116#include <sys/socketvar.h>
117#include <sys/errno.h>
55e303ae 118#include <sys/syscall.h>
91447636 119#include <sys/pipe.h>
1c79356b 120
b0d623f7 121#include <security/audit/audit.h>
e5568f75 122
1c79356b
A
123#include <net/if.h>
124#include <net/route.h>
125
126#include <netinet/in.h>
127#include <netinet/in_systm.h>
128#include <netinet/ip.h>
129#include <netinet/in_pcb.h>
130#include <netinet/ip_var.h>
131#include <netinet/ip6.h>
132#include <netinet/tcp.h>
133#include <netinet/tcp_fsm.h>
134#include <netinet/tcp_seq.h>
135#include <netinet/tcp_timer.h>
136#include <netinet/tcp_var.h>
137#include <netinet/tcpip.h>
138#include <netinet/tcp_debug.h>
0b4e3aa0 139/* for wait queue based select */
3e170ce0 140#include <kern/waitq.h>
91447636 141#include <kern/kalloc.h>
91447636
A
142#include <sys/vnode_internal.h>
143
5ba3f43e
A
144#if CONFIG_MACF
145#include <security/mac_framework.h>
146#endif
147
2d21ac55
A
148/* XXX should be in a header file somewhere */
149void evsofree(struct socket *);
150void evpipefree(struct pipe *);
151void postpipeevent(struct pipe *, int);
152void postevent(struct socket *, struct sockbuf *, int);
153extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
154
91447636 155int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
fe8ab488 156int wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval);
91447636 157
2d21ac55 158__private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
91447636
A
159 user_addr_t bufp, user_size_t nbyte,
160 off_t offset, int flags, user_ssize_t *retval);
2d21ac55 161__private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
91447636
A
162 user_addr_t bufp, user_size_t nbyte,
163 off_t offset, int flags, user_ssize_t *retval);
164__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
165__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
9bccf70c 166
6d2010ae 167/* Conflict wait queue for when selects collide (opaque type) */
3e170ce0 168struct waitq select_conflict_queue;
6d2010ae
A
169
170/*
171 * Init routine called from bsd_init.c
172 */
3e170ce0 173void select_waitq_init(void);
6d2010ae 174void
3e170ce0 175select_waitq_init(void)
6d2010ae 176{
39037602 177 waitq_init(&select_conflict_queue, SYNC_POLICY_FIFO);
6d2010ae
A
178}
179
91447636 180#define f_flag f_fglob->fg_flag
39236c6e 181#define f_type f_fglob->fg_ops->fo_type
91447636
A
182#define f_msgcount f_fglob->fg_msgcount
183#define f_cred f_fglob->fg_cred
184#define f_ops f_fglob->fg_ops
185#define f_offset f_fglob->fg_offset
186#define f_data f_fglob->fg_data
2d21ac55 187
1c79356b
A
188/*
189 * Read system call.
2d21ac55
A
190 *
191 * Returns: 0 Success
192 * preparefileread:EBADF
193 * preparefileread:ESPIPE
194 * preparefileread:ENXIO
195 * preparefileread:EBADF
196 * dofileread:???
1c79356b 197 */
9bccf70c 198int
2d21ac55
A
199read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
200{
201 __pthread_testcancel(1);
202 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
203}
204
205int
206read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 207{
91447636 208 struct fileproc *fp;
9bccf70c 209 int error;
91447636 210 int fd = uap->fd;
b0d623f7 211 struct vfs_context context;
91447636
A
212
213 if ( (error = preparefileread(p, &fp, fd, 0)) )
214 return (error);
9bccf70c 215
b0d623f7
A
216 context = *(vfs_context_current());
217 context.vc_ucred = fp->f_fglob->fg_cred;
218
219 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
91447636
A
220 (off_t)-1, 0, retval);
221
222 donefileread(p, fp, fd);
223
224 return (error);
9bccf70c
A
225}
226
227/*
228 * Pread system call
2d21ac55
A
229 *
230 * Returns: 0 Success
231 * preparefileread:EBADF
232 * preparefileread:ESPIPE
233 * preparefileread:ENXIO
234 * preparefileread:EBADF
235 * dofileread:???
9bccf70c 236 */
9bccf70c 237int
2d21ac55 238pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
9bccf70c 239{
2d21ac55
A
240 __pthread_testcancel(1);
241 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
242}
243
244int
245pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
246{
247 struct fileproc *fp = NULL; /* fp set by preparefileread() */
91447636 248 int fd = uap->fd;
9bccf70c 249 int error;
b0d623f7 250 struct vfs_context context;
9bccf70c 251
91447636 252 if ( (error = preparefileread(p, &fp, fd, 1)) )
4a3eedf9 253 goto out;
91447636 254
b0d623f7
A
255 context = *(vfs_context_current());
256 context.vc_ucred = fp->f_fglob->fg_cred;
257
258 error = dofileread(&context, fp, uap->buf, uap->nbyte,
91447636 259 uap->offset, FOF_OFFSET, retval);
55e303ae 260
91447636
A
261 donefileread(p, fp, fd);
262
b7266188 263 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
55e303ae 264 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
4a3eedf9
A
265
266out:
91447636 267 return (error);
9bccf70c
A
268}
269
270/*
271 * Code common for read and pread
272 */
91447636
A
273
274void
275donefileread(struct proc *p, struct fileproc *fp, int fd)
276{
2d21ac55 277 proc_fdlock_spin(p);
91447636
A
278 fp_drop(p, fd, fp, 1);
279 proc_fdunlock(p);
280}
281
2d21ac55
A
282/*
283 * Returns: 0 Success
284 * EBADF
285 * ESPIPE
286 * ENXIO
287 * fp_lookup:EBADF
288 * fo_read:???
289 */
91447636
A
290int
291preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
292{
293 vnode_t vp;
294 int error;
295 struct fileproc *fp;
296
b0d623f7
A
297 AUDIT_ARG(fd, fd);
298
2d21ac55 299 proc_fdlock_spin(p);
91447636
A
300
301 error = fp_lookup(p, fd, &fp, 1);
302
303 if (error) {
304 proc_fdunlock(p);
305 return (error);
306 }
307 if ((fp->f_flag & FREAD) == 0) {
308 error = EBADF;
309 goto out;
310 }
311 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
312 error = ESPIPE;
313 goto out;
314 }
315 if (fp->f_type == DTYPE_VNODE) {
316 vp = (struct vnode *)fp->f_fglob->fg_data;
317
2d21ac55
A
318 if (check_for_pread && (vnode_isfifo(vp))) {
319 error = ESPIPE;
320 goto out;
321 }
322 if (check_for_pread && (vp->v_flag & VISTTY)) {
323 error = ENXIO;
324 goto out;
325 }
91447636
A
326 }
327
328 *fp_ret = fp;
329
330 proc_fdunlock(p);
331 return (0);
332
333out:
334 fp_drop(p, fd, fp, 1);
335 proc_fdunlock(p);
336 return (error);
337}
338
339
2d21ac55
A
340/*
341 * Returns: 0 Success
342 * EINVAL
343 * fo_read:???
344 */
55e303ae 345__private_extern__ int
2d21ac55
A
346dofileread(vfs_context_t ctx, struct fileproc *fp,
347 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
348 user_ssize_t *retval)
1c79356b 349{
91447636
A
350 uio_t auio;
351 user_ssize_t bytecnt;
352 long error = 0;
353 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 354
9bccf70c
A
355 if (nbyte > INT_MAX)
356 return (EINVAL);
91447636 357
2d21ac55 358 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
359 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
360 &uio_buf[0], sizeof(uio_buf));
361 } else {
362 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
363 &uio_buf[0], sizeof(uio_buf));
364 }
365 uio_addiov(auio, bufp, nbyte);
366
91447636 367 bytecnt = nbyte;
9bccf70c 368
2d21ac55 369 if ((error = fo_read(fp, auio, flags, ctx))) {
91447636 370 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
371 error == EINTR || error == EWOULDBLOCK))
372 error = 0;
373 }
91447636 374 bytecnt -= uio_resid(auio);
91447636
A
375
376 *retval = bytecnt;
377
9bccf70c 378 return (error);
1c79356b
A
379}
380
9bccf70c
A
381/*
382 * Scatter read system call.
2d21ac55
A
383 *
384 * Returns: 0 Success
385 * EINVAL
386 * ENOMEM
387 * copyin:EFAULT
388 * rd_uio:???
9bccf70c 389 */
9bccf70c 390int
2d21ac55
A
391readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
392{
393 __pthread_testcancel(1);
394 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
395}
396
397int
398readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
1c79356b 399{
91447636 400 uio_t auio = NULL;
1c79356b 401 int error;
91447636
A
402 struct user_iovec *iovp;
403
404 /* Verify range bedfore calling uio_create() */
405 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
406 return (EINVAL);
407
408 /* allocate a uio large enough to hold the number of iovecs passed */
409 auio = uio_create(uap->iovcnt, 0,
410 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
411 UIO_READ);
412
413 /* get location of iovecs within the uio. then copyin the iovecs from
414 * user space.
415 */
416 iovp = uio_iovsaddr(auio);
417 if (iovp == NULL) {
418 error = ENOMEM;
419 goto ExitThisRoutine;
420 }
b0d623f7
A
421 error = copyin_user_iovec_array(uap->iovp,
422 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
423 uap->iovcnt, iovp);
91447636
A
424 if (error) {
425 goto ExitThisRoutine;
426 }
427
428 /* finalize uio_t for use and do the IO
429 */
39236c6e
A
430 error = uio_calculateresid(auio);
431 if (error) {
432 goto ExitThisRoutine;
433 }
91447636
A
434 error = rd_uio(p, uap->fd, auio, retval);
435
436ExitThisRoutine:
437 if (auio != NULL) {
438 uio_free(auio);
439 }
1c79356b
A
440 return (error);
441}
442
443/*
444 * Write system call
2d21ac55
A
445 *
446 * Returns: 0 Success
447 * EBADF
448 * fp_lookup:EBADF
449 * dofilewrite:???
1c79356b 450 */
9bccf70c 451int
2d21ac55
A
452write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
453{
454 __pthread_testcancel(1);
455 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
456
457}
458
459int
460write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
1c79356b 461{
91447636 462 struct fileproc *fp;
9bccf70c 463 int error;
91447636 464 int fd = uap->fd;
fe8ab488 465 bool wrote_some = false;
9bccf70c 466
b0d623f7
A
467 AUDIT_ARG(fd, fd);
468
91447636
A
469 error = fp_lookup(p,fd,&fp,0);
470 if (error)
471 return(error);
472 if ((fp->f_flag & FWRITE) == 0) {
473 error = EBADF;
fe8ab488
A
474 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
475 proc_fdlock(p);
476 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
477 proc_fdunlock(p);
91447636 478 } else {
2d21ac55
A
479 struct vfs_context context = *(vfs_context_current());
480 context.vc_ucred = fp->f_fglob->fg_cred;
481
482 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
9bccf70c 483 (off_t)-1, 0, retval);
fe8ab488
A
484
485 wrote_some = *retval > 0;
91447636 486 }
fe8ab488 487 if (wrote_some)
91447636
A
488 fp_drop_written(p, fd, fp);
489 else
490 fp_drop(p, fd, fp, 0);
9bccf70c
A
491 return(error);
492}
493
494/*
91447636 495 * pwrite system call
2d21ac55
A
496 *
497 * Returns: 0 Success
498 * EBADF
499 * ESPIPE
500 * ENXIO
501 * EINVAL
502 * fp_lookup:EBADF
503 * dofilewrite:???
9bccf70c 504 */
9bccf70c 505int
2d21ac55
A
506pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
507{
508 __pthread_testcancel(1);
509 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
510}
511
512int
513pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 514{
91447636 515 struct fileproc *fp;
9bccf70c 516 int error;
91447636 517 int fd = uap->fd;
2d21ac55 518 vnode_t vp = (vnode_t)0;
fe8ab488 519 bool wrote_some = false;
91447636 520
b0d623f7
A
521 AUDIT_ARG(fd, fd);
522
91447636
A
523 error = fp_lookup(p,fd,&fp,0);
524 if (error)
525 return(error);
9bccf70c 526
91447636
A
527 if ((fp->f_flag & FWRITE) == 0) {
528 error = EBADF;
fe8ab488
A
529 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
530 proc_fdlock(p);
531 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
532 proc_fdunlock(p);
91447636 533 } else {
2d21ac55
A
534 struct vfs_context context = *vfs_context_current();
535 context.vc_ucred = fp->f_fglob->fg_cred;
536
91447636
A
537 if (fp->f_type != DTYPE_VNODE) {
538 error = ESPIPE;
2d21ac55
A
539 goto errout;
540 }
541 vp = (vnode_t)fp->f_fglob->fg_data;
542 if (vnode_isfifo(vp)) {
543 error = ESPIPE;
544 goto errout;
545 }
546 if ((vp->v_flag & VISTTY)) {
547 error = ENXIO;
548 goto errout;
91447636 549 }
2d21ac55
A
550 if (uap->offset == (off_t)-1) {
551 error = EINVAL;
552 goto errout;
553 }
554
555 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
556 uap->offset, FOF_OFFSET, retval);
fe8ab488 557 wrote_some = *retval > 0;
9bccf70c 558 }
2d21ac55 559errout:
fe8ab488 560 if (wrote_some)
91447636
A
561 fp_drop_written(p, fd, fp);
562 else
563 fp_drop(p, fd, fp, 0);
55e303ae 564
b7266188 565 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
55e303ae
A
566 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
567
9bccf70c
A
568 return(error);
569}
570
2d21ac55
A
571/*
572 * Returns: 0 Success
573 * EINVAL
574 * <fo_write>:EPIPE
575 * <fo_write>:??? [indirect through struct fileops]
576 */
55e303ae 577__private_extern__ int
2d21ac55
A
578dofilewrite(vfs_context_t ctx, struct fileproc *fp,
579 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
580 user_ssize_t *retval)
9bccf70c 581{
91447636
A
582 uio_t auio;
583 long error = 0;
584 user_ssize_t bytecnt;
585 char uio_buf[ UIO_SIZEOF(1) ];
91447636 586
fe8ab488
A
587 if (nbyte > INT_MAX) {
588 *retval = 0;
9bccf70c 589 return (EINVAL);
fe8ab488 590 }
91447636 591
2d21ac55 592 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
593 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
594 &uio_buf[0], sizeof(uio_buf));
595 } else {
596 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
597 &uio_buf[0], sizeof(uio_buf));
598 }
599 uio_addiov(auio, bufp, nbyte);
600
91447636 601 bytecnt = nbyte;
2d21ac55 602 if ((error = fo_write(fp, auio, flags, ctx))) {
91447636 603 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
604 error == EINTR || error == EWOULDBLOCK))
605 error = 0;
55e303ae 606 /* The socket layer handles SIGPIPE */
6d2010ae
A
607 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
608 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) {
2d21ac55
A
609 /* XXX Raise the signal on the thread? */
610 psignal(vfs_context_proc(ctx), SIGPIPE);
611 }
9bccf70c 612 }
91447636 613 bytecnt -= uio_resid(auio);
91447636
A
614 *retval = bytecnt;
615
9bccf70c 616 return (error);
1c79356b 617}
9bccf70c
A
618
619/*
620 * Gather write system call
621 */
9bccf70c 622int
2d21ac55
A
623writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
624{
625 __pthread_testcancel(1);
626 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
627}
628
629int
630writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
1c79356b 631{
91447636 632 uio_t auio = NULL;
1c79356b 633 int error;
fe8ab488 634 struct fileproc *fp;
91447636 635 struct user_iovec *iovp;
fe8ab488 636 bool wrote_some = false;
91447636 637
b0d623f7
A
638 AUDIT_ARG(fd, uap->fd);
639
91447636
A
640 /* Verify range bedfore calling uio_create() */
641 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
642 return (EINVAL);
643
644 /* allocate a uio large enough to hold the number of iovecs passed */
645 auio = uio_create(uap->iovcnt, 0,
646 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
647 UIO_WRITE);
648
649 /* get location of iovecs within the uio. then copyin the iovecs from
650 * user space.
651 */
652 iovp = uio_iovsaddr(auio);
653 if (iovp == NULL) {
654 error = ENOMEM;
655 goto ExitThisRoutine;
656 }
b0d623f7
A
657 error = copyin_user_iovec_array(uap->iovp,
658 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
659 uap->iovcnt, iovp);
91447636
A
660 if (error) {
661 goto ExitThisRoutine;
662 }
663
664 /* finalize uio_t for use and do the IO
665 */
39236c6e
A
666 error = uio_calculateresid(auio);
667 if (error) {
668 goto ExitThisRoutine;
669 }
fe8ab488
A
670
671 error = fp_lookup(p, uap->fd, &fp, 0);
672 if (error)
673 goto ExitThisRoutine;
674
675 if ((fp->f_flag & FWRITE) == 0) {
676 error = EBADF;
677 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
678 proc_fdlock(p);
679 error = fp_guard_exception(p, uap->fd, fp, kGUARD_EXC_WRITE);
680 proc_fdunlock(p);
681 } else {
682 error = wr_uio(p, fp, auio, retval);
683 wrote_some = *retval > 0;
684 }
685
686 if (wrote_some)
687 fp_drop_written(p, uap->fd, fp);
688 else
689 fp_drop(p, uap->fd, fp, 0);
91447636
A
690
691ExitThisRoutine:
692 if (auio != NULL) {
693 uio_free(auio);
694 }
1c79356b
A
695 return (error);
696}
697
91447636 698
9bccf70c 699int
fe8ab488 700wr_uio(struct proc *p, struct fileproc *fp, uio_t uio, user_ssize_t *retval)
1c79356b 701{
91447636
A
702 int error;
703 user_ssize_t count;
2d21ac55 704 struct vfs_context context = *vfs_context_current();
1c79356b 705
91447636 706 count = uio_resid(uio);
2d21ac55
A
707
708 context.vc_ucred = fp->f_cred;
709 error = fo_write(fp, uio, 0, &context);
91447636
A
710 if (error) {
711 if (uio_resid(uio) != count && (error == ERESTART ||
712 error == EINTR || error == EWOULDBLOCK))
713 error = 0;
714 /* The socket layer handles SIGPIPE */
6d2010ae
A
715 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
716 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0)
91447636
A
717 psignal(p, SIGPIPE);
718 }
719 *retval = count - uio_resid(uio);
720
91447636
A
721 return(error);
722}
723
724
725int
2d21ac55 726rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
91447636
A
727{
728 struct fileproc *fp;
729 int error;
730 user_ssize_t count;
2d21ac55 731 struct vfs_context context = *vfs_context_current();
91447636
A
732
733 if ( (error = preparefileread(p, &fp, fdes, 0)) )
734 return (error);
735
736 count = uio_resid(uio);
2d21ac55
A
737
738 context.vc_ucred = fp->f_cred;
739
740 error = fo_read(fp, uio, 0, &context);
9bccf70c 741
91447636
A
742 if (error) {
743 if (uio_resid(uio) != count && (error == ERESTART ||
744 error == EINTR || error == EWOULDBLOCK))
745 error = 0;
1c79356b 746 }
91447636 747 *retval = count - uio_resid(uio);
9bccf70c 748
91447636 749 donefileread(p, fp, fdes);
9bccf70c 750
91447636 751 return (error);
1c79356b
A
752}
753
754/*
755 * Ioctl system call
91447636 756 *
2d21ac55
A
757 * Returns: 0 Success
758 * EBADF
759 * ENOTTY
760 * ENOMEM
761 * ESRCH
762 * copyin:EFAULT
763 * copyoutEFAULT
764 * fp_lookup:EBADF Bad file descriptor
765 * fo_ioctl:???
1c79356b 766 */
9bccf70c 767int
b0d623f7 768ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
1c79356b 769{
39236c6e 770 struct fileproc *fp = NULL;
91447636 771 int error = 0;
39236c6e
A
772 u_int size = 0;
773 caddr_t datap = NULL, memp = NULL;
774 boolean_t is64bit = FALSE;
775 int tmp = 0;
1c79356b 776#define STK_PARAMS 128
5ba3f43e 777 char stkbuf[STK_PARAMS] = {};
91447636 778 int fd = uap->fd;
39236c6e 779 u_long com = uap->com;
2d21ac55 780 struct vfs_context context = *vfs_context_current();
1c79356b 781
e5568f75 782 AUDIT_ARG(fd, uap->fd);
e5568f75 783 AUDIT_ARG(addr, uap->data);
91447636
A
784
785 is64bit = proc_is64bit(p);
b0d623f7
A
786#if CONFIG_AUDIT
787 if (is64bit)
39236c6e 788 AUDIT_ARG(value64, com);
b0d623f7 789 else
39236c6e 790 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com));
b0d623f7 791#endif /* CONFIG_AUDIT */
91447636 792
1c79356b
A
793 /*
794 * Interpret high order word to find amount of data to be
795 * copied to/from the user's address space.
796 */
797 size = IOCPARM_LEN(com);
39236c6e
A
798 if (size > IOCPARM_MAX)
799 return ENOTTY;
1c79356b 800 if (size > sizeof (stkbuf)) {
39236c6e
A
801 if ((memp = (caddr_t)kalloc(size)) == 0)
802 return ENOMEM;
91447636 803 datap = memp;
1c79356b 804 } else
91447636 805 datap = &stkbuf[0];
39236c6e 806 if (com & IOC_IN) {
1c79356b 807 if (size) {
91447636 808 error = copyin(uap->data, datap, size);
39236c6e
A
809 if (error)
810 goto out_nofp;
91447636
A
811 } else {
812 /* XXX - IOC_IN and no size? we should proably return an error here!! */
813 if (is64bit) {
814 *(user_addr_t *)datap = uap->data;
815 }
816 else {
817 *(uint32_t *)datap = (uint32_t)uap->data;
818 }
819 }
39236c6e 820 } else if ((com & IOC_OUT) && size)
1c79356b
A
821 /*
822 * Zero the buffer so the user always
823 * gets back something deterministic.
824 */
91447636 825 bzero(datap, size);
39236c6e 826 else if (com & IOC_VOID) {
91447636
A
827 /* XXX - this is odd since IOC_VOID means no parameters */
828 if (is64bit) {
829 *(user_addr_t *)datap = uap->data;
830 }
831 else {
832 *(uint32_t *)datap = (uint32_t)uap->data;
833 }
834 }
1c79356b 835
39236c6e
A
836 proc_fdlock(p);
837 error = fp_lookup(p,fd,&fp,1);
838 if (error) {
839 proc_fdunlock(p);
840 goto out_nofp;
841 }
842
843 AUDIT_ARG(file, p, fp);
844
845 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
846 error = EBADF;
847 goto out;
848 }
849
850 context.vc_ucred = fp->f_fglob->fg_cred;
851
852#if CONFIG_MACF
853 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, com);
854 if (error)
855 goto out;
856#endif
857
1c79356b 858 switch (com) {
39236c6e
A
859 case FIONCLEX:
860 *fdflags(p, fd) &= ~UF_EXCLOSE;
861 break;
862
863 case FIOCLEX:
864 *fdflags(p, fd) |= UF_EXCLOSE;
865 break;
1c79356b
A
866
867 case FIONBIO:
91447636 868 if ( (tmp = *(int *)datap) )
1c79356b
A
869 fp->f_flag |= FNONBLOCK;
870 else
871 fp->f_flag &= ~FNONBLOCK;
2d21ac55 872 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1c79356b
A
873 break;
874
875 case FIOASYNC:
91447636 876 if ( (tmp = *(int *)datap) )
1c79356b
A
877 fp->f_flag |= FASYNC;
878 else
879 fp->f_flag &= ~FASYNC;
2d21ac55 880 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1c79356b
A
881 break;
882
883 case FIOSETOWN:
91447636 884 tmp = *(int *)datap;
1c79356b
A
885 if (fp->f_type == DTYPE_SOCKET) {
886 ((struct socket *)fp->f_data)->so_pgid = tmp;
1c79356b
A
887 break;
888 }
91447636 889 if (fp->f_type == DTYPE_PIPE) {
2d21ac55 890 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
91447636
A
891 break;
892 }
1c79356b
A
893 if (tmp <= 0) {
894 tmp = -tmp;
895 } else {
2d21ac55 896 struct proc *p1 = proc_find(tmp);
1c79356b
A
897 if (p1 == 0) {
898 error = ESRCH;
899 break;
900 }
2d21ac55
A
901 tmp = p1->p_pgrpid;
902 proc_rele(p1);
1c79356b 903 }
2d21ac55 904 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
1c79356b
A
905 break;
906
907 case FIOGETOWN:
908 if (fp->f_type == DTYPE_SOCKET) {
91447636 909 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
1c79356b
A
910 break;
911 }
2d21ac55 912 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
91447636 913 *(int *)datap = -*(int *)datap;
1c79356b
A
914 break;
915
916 default:
2d21ac55 917 error = fo_ioctl(fp, com, datap, &context);
1c79356b
A
918 /*
919 * Copy any data to user, size was
920 * already set and checked above.
921 */
39236c6e 922 if (error == 0 && (com & IOC_OUT) && size)
91447636 923 error = copyout(datap, uap->data, (u_int)size);
1c79356b
A
924 break;
925 }
91447636
A
926out:
927 fp_drop(p, fd, fp, 1);
928 proc_fdunlock(p);
39236c6e
A
929
930out_nofp:
931 if (memp)
932 kfree(memp, size);
91447636 933 return(error);
1c79356b
A
934}
935
1c79356b 936int selwait, nselcoll;
0b4e3aa0
A
937#define SEL_FIRSTPASS 1
938#define SEL_SECONDPASS 2
9bccf70c
A
939extern int selcontinue(int error);
940extern int selprocess(int error, int sel_pass);
fe8ab488 941static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata,
3e170ce0 942 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset);
6d2010ae
A
943static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
944static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
91447636 945static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
4bd07ac2 946static int select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval);
1c79356b
A
947
948/*
949 * Select system call.
2d21ac55
A
950 *
951 * Returns: 0 Success
952 * EINVAL Invalid argument
953 * EAGAIN Nonconformant error if allocation fails
1c79356b 954 */
9bccf70c 955int
b0d623f7 956select(struct proc *p, struct select_args *uap, int32_t *retval)
2d21ac55
A
957{
958 __pthread_testcancel(1);
4bd07ac2 959 return select_nocancel(p, (struct select_nocancel_args *)uap, retval);
2d21ac55
A
960}
961
962int
b0d623f7 963select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
4bd07ac2
A
964{
965 uint64_t timeout = 0;
966
967 if (uap->tv) {
968 int err;
969 struct timeval atv;
970 if (IS_64BIT_PROCESS(p)) {
971 struct user64_timeval atv64;
972 err = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
973 /* Loses resolution - assume timeout < 68 years */
974 atv.tv_sec = atv64.tv_sec;
975 atv.tv_usec = atv64.tv_usec;
976 } else {
977 struct user32_timeval atv32;
978 err = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
979 atv.tv_sec = atv32.tv_sec;
980 atv.tv_usec = atv32.tv_usec;
981 }
982 if (err)
983 return err;
984
985 if (itimerfix(&atv)) {
986 err = EINVAL;
987 return err;
988 }
989
990 clock_absolutetime_interval_to_deadline(tvtoabstime(&atv), &timeout);
991 }
992
993 return select_internal(p, uap, timeout, retval);
994}
995
996int
997pselect(struct proc *p, struct pselect_args *uap, int32_t *retval)
998{
999 __pthread_testcancel(1);
1000 return pselect_nocancel(p, (struct pselect_nocancel_args *)uap, retval);
1001}
1002
1003int
1004pselect_nocancel(struct proc *p, struct pselect_nocancel_args *uap, int32_t *retval)
1005{
1006 int err;
1007 struct uthread *ut;
1008 uint64_t timeout = 0;
1009
1010 if (uap->ts) {
1011 struct timespec ts;
1012
1013 if (IS_64BIT_PROCESS(p)) {
1014 struct user64_timespec ts64;
1015 err = copyin(uap->ts, (caddr_t)&ts64, sizeof(ts64));
1016 ts.tv_sec = ts64.tv_sec;
1017 ts.tv_nsec = ts64.tv_nsec;
1018 } else {
1019 struct user32_timespec ts32;
1020 err = copyin(uap->ts, (caddr_t)&ts32, sizeof(ts32));
1021 ts.tv_sec = ts32.tv_sec;
1022 ts.tv_nsec = ts32.tv_nsec;
1023 }
1024 if (err) {
1025 return err;
1026 }
1027
1028 if (!timespec_is_valid(&ts)) {
1029 return EINVAL;
1030 }
1031 clock_absolutetime_interval_to_deadline(tstoabstime(&ts), &timeout);
1032 }
1033
1034 ut = get_bsdthread_info(current_thread());
1035
1036 if (uap->mask != USER_ADDR_NULL) {
1037 /* save current mask, then copyin and set new mask */
1038 sigset_t newset;
1039 err = copyin(uap->mask, &newset, sizeof(sigset_t));
1040 if (err) {
1041 return err;
1042 }
1043 ut->uu_oldmask = ut->uu_sigmask;
1044 ut->uu_flag |= UT_SAS_OLDMASK;
1045 ut->uu_sigmask = (newset & ~sigcantmask);
1046 }
1047
1048 err = select_internal(p, (struct select_nocancel_args *)uap, timeout, retval);
1049
1050 if (err != EINTR && ut->uu_flag & UT_SAS_OLDMASK) {
1051 /*
1052 * Restore old mask (direct return case). NOTE: EINTR can also be returned
1053 * if the thread is cancelled. In that case, we don't reset the signal
1054 * mask to its original value (which usually happens in the signal
1055 * delivery path). This behavior is permitted by POSIX.
1056 */
1057 ut->uu_sigmask = ut->uu_oldmask;
1058 ut->uu_oldmask = 0;
1059 ut->uu_flag &= ~UT_SAS_OLDMASK;
1060 }
1061
1062 return err;
1063}
1064
1065/*
1066 * Generic implementation of {,p}select. Care: we type-pun uap across the two
1067 * syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
1068 * are identical. The 5th (timeout) argument points to different types, so we
1069 * unpack in the syscall-specific code, but the generic code still does a null
1070 * check on this argument to determine if a timeout was specified.
1071 */
1072static int
1073select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval)
1c79356b 1074{
9bccf70c 1075 int error = 0;
3e170ce0 1076 u_int ni, nw;
91447636 1077 thread_t th_act;
1c79356b
A
1078 struct uthread *uth;
1079 struct _select *sel;
fe8ab488 1080 struct _select_data *seldata;
1c79356b 1081 int needzerofill = 1;
0b4e3aa0 1082 int count = 0;
3e170ce0 1083 size_t sz = 0;
1c79356b 1084
91447636 1085 th_act = current_thread();
1c79356b 1086 uth = get_bsdthread_info(th_act);
91447636 1087 sel = &uth->uu_select;
fe8ab488 1088 seldata = &uth->uu_kevent.ss_select_data;
1c79356b
A
1089 *retval = 0;
1090
fe8ab488
A
1091 seldata->args = uap;
1092 seldata->retval = retval;
3e170ce0
A
1093 seldata->wqp = NULL;
1094 seldata->count = 0;
fe8ab488 1095
0b4e3aa0 1096 if (uap->nd < 0) {
1c79356b 1097 return (EINVAL);
0b4e3aa0 1098 }
1c79356b 1099
2d21ac55
A
1100 /* select on thread of process that already called proc_exit() */
1101 if (p->p_fd == NULL) {
1102 return (EBADF);
1103 }
1104
1c79356b
A
1105 if (uap->nd > p->p_fd->fd_nfiles)
1106 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
1107
1108 nw = howmany(uap->nd, NFDBITS);
1109 ni = nw * sizeof(fd_mask);
1110
1111 /*
2d21ac55
A
1112 * if the previously allocated space for the bits is smaller than
1113 * what is requested or no space has yet been allocated for this
1114 * thread, allocate enough space now.
1115 *
1116 * Note: If this process fails, select() will return EAGAIN; this
1117 * is the same thing pool() returns in a no-memory situation, but
1118 * it is not a POSIX compliant error code for select().
1c79356b
A
1119 */
1120 if (sel->nbytes < (3 * ni)) {
2d21ac55
A
1121 int nbytes = 3 * ni;
1122
1123 /* Free previous allocation, if any */
1124 if (sel->ibits != NULL)
1125 FREE(sel->ibits, M_TEMP);
1126 if (sel->obits != NULL) {
1127 FREE(sel->obits, M_TEMP);
1128 /* NULL out; subsequent ibits allocation may fail */
1129 sel->obits = NULL;
1130 }
1131
1132 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1133 if (sel->ibits == NULL)
1134 return (EAGAIN);
1135 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1136 if (sel->obits == NULL) {
1137 FREE(sel->ibits, M_TEMP);
1138 sel->ibits = NULL;
1139 return (EAGAIN);
1140 }
1141 sel->nbytes = nbytes;
1c79356b 1142 needzerofill = 0;
2d21ac55 1143 }
1c79356b
A
1144
1145 if (needzerofill) {
1146 bzero((caddr_t)sel->ibits, sel->nbytes);
1147 bzero((caddr_t)sel->obits, sel->nbytes);
1148 }
1149
1150 /*
1151 * get the bits from the user address space
1152 */
1153#define getbits(name, x) \
1154 do { \
91447636 1155 if (uap->name && (error = copyin(uap->name, \
1c79356b
A
1156 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1157 goto continuation; \
1158 } while (0)
1159
1160 getbits(in, 0);
1161 getbits(ou, 1);
1162 getbits(ex, 2);
1163#undef getbits
1164
4bd07ac2 1165 seldata->abstime = timeout;
9bccf70c 1166
6d2010ae 1167 if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
0b4e3aa0
A
1168 goto continuation;
1169 }
b0d623f7 1170
3e170ce0
A
1171 /*
1172 * We need an array of waitq pointers. This is due to the new way
1173 * in which waitqs are linked to sets. When a thread selects on a
1174 * file descriptor, a waitq (embedded in a selinfo structure) is
1175 * added to the thread's local waitq set. There is no longer any
1176 * way to directly iterate over all members of a given waitq set.
1177 * The process of linking a waitq into a set may allocate a link
1178 * table object. Because we can't iterate over all the waitqs to
1179 * which our thread waitq set belongs, we need a way of removing
1180 * this link object!
1181 *
1182 * Thus we need a buffer which will hold one waitq pointer
1183 * per FD being selected. During the tear-down phase we can use
1184 * these pointers to dis-associate the underlying selinfo's waitq
1185 * from our thread's waitq set.
1186 *
1187 * Because we also need to allocate a waitq set for this thread,
1188 * we use a bare buffer pointer to hold all the memory. Note that
1189 * this memory is cached in the thread pointer and not reaped until
1190 * the thread exists. This is generally OK because threads that
1191 * call select tend to keep calling select repeatedly.
1192 */
1193 sz = ALIGN(sizeof(struct waitq_set)) + (count * sizeof(uint64_t));
1194 if (sz > uth->uu_wqstate_sz) {
1195 /* (re)allocate a buffer to hold waitq pointers */
1196 if (uth->uu_wqset) {
1197 if (waitq_set_is_valid(uth->uu_wqset))
1198 waitq_set_deinit(uth->uu_wqset);
1199 FREE(uth->uu_wqset, M_SELECT);
1200 } else if (uth->uu_wqstate_sz && !uth->uu_wqset)
1201 panic("select: thread structure corrupt! "
1202 "uu_wqstate_sz:%ld, wqstate_buf == NULL",
1203 uth->uu_wqstate_sz);
1204 uth->uu_wqstate_sz = sz;
1205 MALLOC(uth->uu_wqset, struct waitq_set *, sz, M_SELECT, M_WAITOK);
1206 if (!uth->uu_wqset)
1207 panic("can't allocate %ld bytes for wqstate buffer",
1208 uth->uu_wqstate_sz);
1209 waitq_set_init(uth->uu_wqset,
39037602 1210 SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST, NULL, NULL);
3e170ce0
A
1211 }
1212
1213 if (!waitq_set_is_valid(uth->uu_wqset))
1214 waitq_set_init(uth->uu_wqset,
39037602 1215 SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST, NULL, NULL);
3e170ce0
A
1216
1217 /* the last chunk of our buffer is an array of waitq pointers */
1218 seldata->wqp = (uint64_t *)((char *)(uth->uu_wqset) + ALIGN(sizeof(struct waitq_set)));
1219 bzero(seldata->wqp, sz - ALIGN(sizeof(struct waitq_set)));
1220
fe8ab488 1221 seldata->count = count;
0b4e3aa0 1222
1c79356b 1223continuation:
6d2010ae
A
1224
1225 if (error) {
1226 /*
1227 * We have already cleaned up any state we established,
1228 * either locally or as a result of selcount(). We don't
1229 * need to wait_subqueue_unlink_all(), since we haven't set
1230 * anything at this point.
1231 */
1232 return (error);
1233 }
1234
1235 return selprocess(0, SEL_FIRSTPASS);
0b4e3aa0
A
1236}
1237
1238int
1239selcontinue(int error)
1240{
9bccf70c 1241 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
1242}
1243
6d2010ae
A
1244
1245/*
1246 * selprocess
1247 *
1248 * Parameters: error The error code from our caller
1249 * sel_pass The pass we are on
1250 */
1c79356b 1251int
91447636 1252selprocess(int error, int sel_pass)
1c79356b 1253{
9bccf70c 1254 int ncoll;
1c79356b 1255 u_int ni, nw;
91447636 1256 thread_t th_act;
1c79356b
A
1257 struct uthread *uth;
1258 struct proc *p;
fe8ab488 1259 struct select_nocancel_args *uap;
1c79356b
A
1260 int *retval;
1261 struct _select *sel;
fe8ab488 1262 struct _select_data *seldata;
0b4e3aa0 1263 int unwind = 1;
9bccf70c 1264 int prepost = 0;
0b4e3aa0
A
1265 int somewakeup = 0;
1266 int doretry = 0;
9bccf70c 1267 wait_result_t wait_result;
1c79356b
A
1268
1269 p = current_proc();
91447636 1270 th_act = current_thread();
1c79356b 1271 uth = get_bsdthread_info(th_act);
91447636 1272 sel = &uth->uu_select;
fe8ab488
A
1273 seldata = &uth->uu_kevent.ss_select_data;
1274 uap = seldata->args;
1275 retval = seldata->retval;
1c79356b 1276
0b4e3aa0 1277 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
3e170ce0 1278 unwind = 0;
fe8ab488 1279 if (seldata->count == 0)
3e170ce0 1280 unwind = 0;
1c79356b 1281retry:
3e170ce0 1282 if (error != 0)
6d2010ae 1283 goto done;
0b4e3aa0 1284
1c79356b 1285 ncoll = nselcoll;
b0d623f7 1286 OSBitOrAtomic(P_SELECT, &p->p_flag);
3e170ce0 1287
0b4e3aa0 1288 /* skip scans if the select is just for timeouts */
fe8ab488 1289 if (seldata->count) {
3e170ce0 1290 error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, uth->uu_wqset);
0b4e3aa0
A
1291 if (error || *retval) {
1292 goto done;
1293 }
3e170ce0
A
1294 if (prepost || somewakeup) {
1295 /*
1296 * if the select of log, then we can wakeup and
1297 * discover some one else already read the data;
1298 * go to select again if time permits
1299 */
1300 prepost = 0;
1301 somewakeup = 0;
1302 doretry = 1;
0b4e3aa0
A
1303 }
1304 }
1305
9bccf70c
A
1306 if (uap->tv) {
1307 uint64_t now;
1308
1309 clock_get_uptime(&now);
fe8ab488 1310 if (now >= seldata->abstime)
9bccf70c 1311 goto done;
1c79356b 1312 }
0b4e3aa0
A
1313
1314 if (doretry) {
1315 /* cleanup obits and try again */
1316 doretry = 0;
1317 sel_pass = SEL_FIRSTPASS;
1318 goto retry;
1319 }
1320
1c79356b
A
1321 /*
1322 * To effect a poll, the timeout argument should be
1323 * non-nil, pointing to a zero-valued timeval structure.
1324 */
fe8ab488 1325 if (uap->tv && seldata->abstime == 0) {
1c79356b
A
1326 goto done;
1327 }
0b4e3aa0
A
1328
1329 /* No spurious wakeups due to colls,no need to check for them */
1330 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1331 sel_pass = SEL_FIRSTPASS;
1c79356b
A
1332 goto retry;
1333 }
0b4e3aa0 1334
b0d623f7 1335 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b 1336
0b4e3aa0 1337 /* if the select is just for timeout skip check */
3e170ce0 1338 if (seldata->count && (sel_pass == SEL_SECONDPASS))
0b4e3aa0
A
1339 panic("selprocess: 2nd pass assertwaiting");
1340
3e170ce0
A
1341 /* waitq_set has waitqueue as first element */
1342 wait_result = waitq_assert_wait64_leeway((struct waitq *)uth->uu_wqset,
1343 NO_EVENT64, THREAD_ABORTSAFE,
1344 TIMEOUT_URGENCY_USER_NORMAL,
1345 seldata->abstime,
1346 TIMEOUT_NO_LEEWAY);
9bccf70c
A
1347 if (wait_result != THREAD_AWAKENED) {
1348 /* there are no preposted events */
91447636
A
1349 error = tsleep1(NULL, PSOCK | PCATCH,
1350 "select", 0, selcontinue);
0b4e3aa0
A
1351 } else {
1352 prepost = 1;
1353 error = 0;
1354 }
1355
0b4e3aa0 1356 if (error == 0) {
6d2010ae 1357 sel_pass = SEL_SECONDPASS;
0b4e3aa0 1358 if (!prepost)
6d2010ae 1359 somewakeup = 1;
1c79356b 1360 goto retry;
0b4e3aa0 1361 }
1c79356b 1362done:
91447636 1363 if (unwind) {
91447636 1364 seldrop(p, sel->ibits, uap->nd);
3e170ce0
A
1365 waitq_set_deinit(uth->uu_wqset);
1366 /*
1367 * zero out the waitq pointer array to avoid use-after free
1368 * errors in the selcount error path (seldrop_locked) if/when
1369 * the thread re-calls select().
1370 */
1371 bzero((void *)uth->uu_wqset, uth->uu_wqstate_sz);
91447636 1372 }
b0d623f7 1373 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b
A
1374 /* select is not restarted after signals... */
1375 if (error == ERESTART)
1376 error = EINTR;
1377 if (error == EWOULDBLOCK)
1378 error = 0;
1c79356b
A
1379 nw = howmany(uap->nd, NFDBITS);
1380 ni = nw * sizeof(fd_mask);
1381
1382#define putbits(name, x) \
1383 do { \
91447636
A
1384 if (uap->name && (error2 = \
1385 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1c79356b
A
1386 error = error2; \
1387 } while (0)
1388
1389 if (error == 0) {
1390 int error2;
1391
1392 putbits(in, 0);
1393 putbits(ou, 1);
1394 putbits(ex, 2);
1395#undef putbits
1396 }
4bd07ac2
A
1397
1398 if (error != EINTR && sel_pass == SEL_SECONDPASS && uth->uu_flag & UT_SAS_OLDMASK) {
1399 /* restore signal mask - continuation case */
1400 uth->uu_sigmask = uth->uu_oldmask;
1401 uth->uu_oldmask = 0;
1402 uth->uu_flag &= ~UT_SAS_OLDMASK;
1403 }
1404
1c79356b 1405 return(error);
1c79356b
A
1406}
1407
6d2010ae 1408
3e170ce0
A
1409/**
1410 * remove the fileproc's underlying waitq from the supplied waitq set;
1411 * clear FP_INSELECT when appropriate
1412 *
1413 * Parameters:
1414 * fp File proc that is potentially currently in select
1415 * wqset Waitq set to which the fileproc may belong
1416 * (usually this is the thread's private waitq set)
1417 * Conditions:
1418 * proc_fdlock is held
1419 */
1420static void selunlinkfp(struct fileproc *fp, uint64_t wqp_id, struct waitq_set *wqset)
1421{
1422 int valid_set = waitq_set_is_valid(wqset);
1423 int valid_q = !!wqp_id;
1424
1425 /*
1426 * This could be called (from selcount error path) before we setup
1427 * the thread's wqset. Check the wqset passed in, and only unlink if
1428 * the set is valid.
1429 */
1430
1431 /* unlink the underlying waitq from the input set (thread waitq set) */
1432 if (valid_q && valid_set)
1433 waitq_unlink_by_prepost_id(wqp_id, wqset);
1434
1435 /* allow passing a NULL/invalid fp for seldrop unwind */
1436 if (!fp || !(fp->f_flags & (FP_INSELECT|FP_SELCONFLICT)))
1437 return;
1438
1439 /*
1440 * We can always remove the conflict queue from our thread's set: this
1441 * will not affect other threads that potentially need to be awoken on
1442 * the conflict queue during a fileproc_drain - those sets will still
1443 * be linked with the global conflict queue, and the last waiter
1444 * on the fp clears the CONFLICT marker.
1445 */
1446 if (valid_set && (fp->f_flags & FP_SELCONFLICT))
1447 waitq_unlink(&select_conflict_queue, wqset);
1448
1449 /* jca: TODO:
1450 * This isn't quite right - we don't actually know if this
1451 * fileproc is in another select or not! Here we just assume
1452 * that if we were the first thread to select on the FD, then
1453 * we'll be the one to clear this flag...
1454 */
1455 if (valid_set && fp->f_wset == (void *)wqset) {
1456 fp->f_flags &= ~FP_INSELECT;
1457 fp->f_wset = NULL;
1458 }
1459}
1460
1461/**
1462 * connect a fileproc to the given wqset, potentially bridging to a waitq
1463 * pointed to indirectly by wq_data
1464 *
1465 * Parameters:
1466 * fp File proc potentially currently in select
1467 * wq_data Pointer to a pointer to a waitq (could be NULL)
1468 * wqset Waitq set to which the fileproc should now belong
1469 * (usually this is the thread's private waitq set)
1470 *
1471 * Conditions:
1472 * proc_fdlock is held
1473 */
1474static uint64_t sellinkfp(struct fileproc *fp, void **wq_data, struct waitq_set *wqset)
1475{
1476 struct waitq *f_wq = NULL;
1477
1478 if ((fp->f_flags & FP_INSELECT) != FP_INSELECT) {
1479 if (wq_data)
1480 panic("non-null data:%p on fp:%p not in select?!"
1481 "(wqset:%p)", wq_data, fp, wqset);
1482 return 0;
1483 }
1484
1485 if ((fp->f_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1486 /*
1487 * The conflict queue requires disabling interrupts, so we
1488 * need to explicitly reserve a link object to avoid a
1489 * panic/assert in the waitq code. Hopefully this extra step
1490 * can be avoided if we can split the waitq structure into
1491 * blocking and linkage sub-structures.
1492 */
1493 uint64_t reserved_link = waitq_link_reserve(&select_conflict_queue);
1494 waitq_link(&select_conflict_queue, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
1495 waitq_link_release(reserved_link);
1496 }
1497
1498 /*
1499 * The wq_data parameter has potentially been set by selrecord called
1500 * from a subsystems fo_select() function. If the subsystem does not
1501 * call selrecord, then wq_data will be NULL
1502 *
1503 * Use memcpy to get the value into a proper pointer because
1504 * wq_data most likely points to a stack variable that could be
1505 * unaligned on 32-bit systems.
1506 */
1507 if (wq_data) {
1508 memcpy(&f_wq, wq_data, sizeof(f_wq));
1509 if (!waitq_is_valid(f_wq))
1510 f_wq = NULL;
1511 }
1512
1513 /* record the first thread's wqset in the fileproc structure */
1514 if (!fp->f_wset)
1515 fp->f_wset = (void *)wqset;
1516
1517 /* handles NULL f_wq */
1518 return waitq_get_prepost_id(f_wq);
1519}
1520
1521
6d2010ae
A
1522/*
1523 * selscan
1524 *
1525 * Parameters: p Process performing the select
1526 * sel The per-thread select context structure
1527 * nfd The number of file descriptors to scan
1528 * retval The per thread system call return area
1529 * sel_pass Which pass this is; allowed values are
1530 * SEL_FIRSTPASS and SEL_SECONDPASS
3e170ce0 1531 * wqset The per thread wait queue set
6d2010ae
A
1532 *
1533 * Returns: 0 Success
1534 * EIO Invalid p->p_fd field XXX Obsolete?
1535 * EBADF One of the files in the bit vector is
1536 * invalid.
1537 */
1c79356b 1538static int
3e170ce0
A
1539selscan(struct proc *p, struct _select *sel, struct _select_data * seldata,
1540 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset)
1c79356b 1541{
2d21ac55
A
1542 struct filedesc *fdp = p->p_fd;
1543 int msk, i, j, fd;
1544 u_int32_t bits;
91447636 1545 struct fileproc *fp;
6d2010ae
A
1546 int n = 0; /* count of bits */
1547 int nc = 0; /* bit vector offset (nc'th bit) */
1c79356b
A
1548 static int flag[3] = { FREAD, FWRITE, 0 };
1549 u_int32_t *iptr, *optr;
1550 u_int nw;
0b4e3aa0 1551 u_int32_t *ibits, *obits;
3e170ce0 1552 uint64_t reserved_link, *rl_ptr = NULL;
6d2010ae 1553 int count;
2d21ac55 1554 struct vfs_context context = *vfs_context_current();
1c79356b
A
1555
1556 /*
1557 * Problems when reboot; due to MacOSX signal probs
1558 * in Beaker1C ; verify that the p->p_fd is valid
1559 */
1560 if (fdp == NULL) {
1561 *retval=0;
1562 return(EIO);
1563 }
0b4e3aa0
A
1564 ibits = sel->ibits;
1565 obits = sel->obits;
0b4e3aa0 1566
1c79356b
A
1567 nw = howmany(nfd, NFDBITS);
1568
fe8ab488 1569 count = seldata->count;
2d21ac55
A
1570
1571 nc = 0;
3e170ce0
A
1572 if (!count) {
1573 *retval = 0;
1574 return 0;
1575 }
1576
1577 proc_fdlock(p);
1578 for (msk = 0; msk < 3; msk++) {
1579 iptr = (u_int32_t *)&ibits[msk * nw];
1580 optr = (u_int32_t *)&obits[msk * nw];
1581
1582 for (i = 0; i < nfd; i += NFDBITS) {
1583 bits = iptr[i/NFDBITS];
2d21ac55 1584
3e170ce0
A
1585 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1586 bits &= ~(1 << j);
2d21ac55 1587
3e170ce0
A
1588 if (fd < fdp->fd_nfiles)
1589 fp = fdp->fd_ofiles[fd];
1590 else
1591 fp = NULL;
fe8ab488 1592
3e170ce0
A
1593 if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1594 /*
1595 * If we abort because of a bad
1596 * fd, let the caller unwind...
1597 */
1598 proc_fdunlock(p);
1599 return(EBADF);
1600 }
1601 if (sel_pass == SEL_SECONDPASS) {
1602 reserved_link = 0;
1603 rl_ptr = NULL;
1604 selunlinkfp(fp, seldata->wqp[nc], wqset);
1605 } else {
1606 reserved_link = waitq_link_reserve((struct waitq *)wqset);
1607 rl_ptr = &reserved_link;
1608 if (fp->f_flags & FP_INSELECT)
1609 /* someone is already in select on this fp */
1610 fp->f_flags |= FP_SELCONFLICT;
fe8ab488 1611 else
3e170ce0
A
1612 fp->f_flags |= FP_INSELECT;
1613 }
2d21ac55 1614
3e170ce0 1615 context.vc_ucred = fp->f_cred;
2d21ac55 1616
3e170ce0
A
1617 /*
1618 * stash this value b/c fo_select may replace
1619 * reserved_link with a pointer to a waitq object
1620 */
1621 uint64_t rsvd = reserved_link;
1622
1623 /* The select; set the bit, if true */
1624 if (fp->f_ops && fp->f_type
1625 && fo_select(fp, flag[msk], rl_ptr, &context)) {
1626 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1627 n++;
1628 }
1629 if (sel_pass == SEL_FIRSTPASS) {
1630 waitq_link_release(rsvd);
1631 /*
1632 * If the fp's supporting selinfo structure was linked
1633 * to this thread's waitq set, then 'reserved_link'
1634 * will have been updated by selrecord to be a pointer
1635 * to the selinfo's waitq.
1636 */
1637 if (reserved_link == rsvd)
1638 rl_ptr = NULL; /* fo_select never called selrecord() */
1639 /*
1640 * Hook up the thread's waitq set either to
1641 * the fileproc structure, or to the global
1642 * conflict queue: but only on the first
1643 * select pass.
1644 */
1645 seldata->wqp[nc] = sellinkfp(fp, (void **)rl_ptr, wqset);
2d21ac55 1646 }
3e170ce0 1647 nc++;
2d21ac55
A
1648 }
1649 }
0b4e3aa0 1650 }
3e170ce0
A
1651 proc_fdunlock(p);
1652
1c79356b
A
1653 *retval = n;
1654 return (0);
1655}
1656
3e170ce0 1657int poll_callback(struct kqueue *, struct kevent_internal_s *, void *);
91447636
A
1658
1659struct poll_continue_args {
1660 user_addr_t pca_fds;
1661 u_int pca_nfds;
1662 u_int pca_rfds;
1663};
1664
9bccf70c 1665int
b0d623f7 1666poll(struct proc *p, struct poll_args *uap, int32_t *retval)
2d21ac55
A
1667{
1668 __pthread_testcancel(1);
1669 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1670}
1671
1672
1673int
b0d623f7 1674poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1c79356b 1675{
91447636
A
1676 struct poll_continue_args *cont;
1677 struct pollfd *fds;
1678 struct kqueue *kq;
1679 struct timeval atv;
1680 int ncoll, error = 0;
1681 u_int nfds = uap->nfds;
1682 u_int rfds = 0;
1683 u_int i;
1684 size_t ni;
1c79356b 1685
91447636
A
1686 /*
1687 * This is kinda bogus. We have fd limits, but that is not
1688 * really related to the size of the pollfd array. Make sure
1689 * we let the process use at least FD_SETSIZE entries and at
1690 * least enough for the current limits. We want to be reasonably
1691 * safe, but not overly restrictive.
1692 */
1693 if (nfds > OPEN_MAX ||
2d21ac55 1694 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
91447636 1695 return (EINVAL);
1c79356b 1696
39037602 1697 kq = kqueue_alloc(p, 0);
91447636
A
1698 if (kq == NULL)
1699 return (EAGAIN);
1700
1701 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1702 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1703 if (NULL == cont) {
1704 error = EAGAIN;
1705 goto out;
1706 }
1707
1708 fds = (struct pollfd *)&cont[1];
1709 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1710 if (error)
1711 goto out;
1712
1713 if (uap->timeout != -1) {
1714 struct timeval rtv;
1715
1716 atv.tv_sec = uap->timeout / 1000;
1717 atv.tv_usec = (uap->timeout % 1000) * 1000;
1718 if (itimerfix(&atv)) {
1719 error = EINVAL;
1720 goto out;
1721 }
1722 getmicrouptime(&rtv);
1723 timevaladd(&atv, &rtv);
1724 } else {
1725 atv.tv_sec = 0;
1726 atv.tv_usec = 0;
1727 }
1728
1729 /* JMM - all this P_SELECT stuff is bogus */
1730 ncoll = nselcoll;
b0d623f7 1731 OSBitOrAtomic(P_SELECT, &p->p_flag);
91447636
A
1732 for (i = 0; i < nfds; i++) {
1733 short events = fds[i].events;
91447636
A
1734
1735 /* per spec, ignore fd values below zero */
1736 if (fds[i].fd < 0) {
1737 fds[i].revents = 0;
1738 continue;
1739 }
1740
1741 /* convert the poll event into a kqueue kevent */
3e170ce0
A
1742 struct kevent_internal_s kev = {
1743 .ident = fds[i].fd,
1744 .flags = EV_ADD | EV_ONESHOT | EV_POLL,
1745 .udata = CAST_USER_ADDR_T(&fds[i]) };
91447636
A
1746
1747 /* Handle input events */
2d21ac55 1748 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
91447636 1749 kev.filter = EVFILT_READ;
04b8595b 1750 if (events & ( POLLPRI | POLLRDBAND ))
91447636 1751 kev.flags |= EV_OOBAND;
39037602 1752 kevent_register(kq, &kev, p);
91447636
A
1753 }
1754
1755 /* Handle output events */
39037602
A
1756 if ((kev.flags & EV_ERROR) == 0 &&
1757 (events & ( POLLOUT | POLLWRNORM | POLLWRBAND ))) {
91447636 1758 kev.filter = EVFILT_WRITE;
39037602 1759 kevent_register(kq, &kev, p);
91447636
A
1760 }
1761
1762 /* Handle BSD extension vnode events */
39037602
A
1763 if ((kev.flags & EV_ERROR) == 0 &&
1764 (events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE ))) {
91447636
A
1765 kev.filter = EVFILT_VNODE;
1766 kev.fflags = 0;
1767 if (events & POLLEXTEND)
1768 kev.fflags |= NOTE_EXTEND;
1769 if (events & POLLATTRIB)
1770 kev.fflags |= NOTE_ATTRIB;
1771 if (events & POLLNLINK)
1772 kev.fflags |= NOTE_LINK;
1773 if (events & POLLWRITE)
1774 kev.fflags |= NOTE_WRITE;
39037602 1775 kevent_register(kq, &kev, p);
91447636
A
1776 }
1777
39037602 1778 if (kev.flags & EV_ERROR) {
91447636
A
1779 fds[i].revents = POLLNVAL;
1780 rfds++;
1781 } else
1782 fds[i].revents = 0;
1783 }
1784
d190cdc3
A
1785 /*
1786 * Did we have any trouble registering?
1787 * If user space passed 0 FDs, then respect any timeout value passed.
1788 * This is an extremely inefficient sleep. If user space passed one or
1789 * more FDs, and we had trouble registering _all_ of them, then bail
1790 * out. If a subset of the provided FDs failed to register, then we
1791 * will still call the kqueue_scan function.
1792 */
1793 if (nfds && (rfds == nfds))
91447636
A
1794 goto done;
1795
813fb2f6
A
1796 /*
1797 * If any events have trouble registering, an event has fired and we
1798 * shouldn't wait for events in kqueue_scan -- use the current time as
1799 * the deadline.
1800 */
1801 if (rfds)
1802 getmicrouptime(&atv);
1803
91447636
A
1804 /* scan for, and possibly wait for, the kevents to trigger */
1805 cont->pca_fds = uap->fds;
1806 cont->pca_nfds = nfds;
1807 cont->pca_rfds = rfds;
39037602 1808 error = kqueue_scan(kq, poll_callback, NULL, cont, NULL, &atv, p);
91447636
A
1809 rfds = cont->pca_rfds;
1810
1811 done:
b0d623f7 1812 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
91447636
A
1813 /* poll is not restarted after signals... */
1814 if (error == ERESTART)
1815 error = EINTR;
1816 if (error == EWOULDBLOCK)
1817 error = 0;
1818 if (error == 0) {
1819 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1820 *retval = rfds;
1821 }
1822
1823 out:
1824 if (NULL != cont)
1825 FREE(cont, M_TEMP);
1826
2d21ac55 1827 kqueue_dealloc(kq);
91447636
A
1828 return (error);
1829}
1830
2d21ac55 1831int
3e170ce0 1832poll_callback(__unused struct kqueue *kq, struct kevent_internal_s *kevp, void *data)
91447636
A
1833{
1834 struct poll_continue_args *cont = (struct poll_continue_args *)data;
1835 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
316670eb 1836 short prev_revents = fds->revents;
04b8595b 1837 short mask = 0;
ff6e181a 1838
91447636
A
1839 /* convert the results back into revents */
1840 if (kevp->flags & EV_EOF)
1841 fds->revents |= POLLHUP;
1842 if (kevp->flags & EV_ERROR)
1843 fds->revents |= POLLERR;
91447636
A
1844
1845 switch (kevp->filter) {
1846 case EVFILT_READ:
ff6e181a
A
1847 if (fds->revents & POLLHUP)
1848 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1849 else {
3e170ce0 1850 mask = (POLLIN | POLLRDNORM);
ff6e181a 1851 if (kevp->flags & EV_OOBAND)
3e170ce0 1852 mask |= (POLLPRI | POLLRDBAND);
ff6e181a
A
1853 }
1854 fds->revents |= (fds->events & mask);
91447636
A
1855 break;
1856
1857 case EVFILT_WRITE:
1858 if (!(fds->revents & POLLHUP))
1859 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1860 break;
1861
2d21ac55 1862 case EVFILT_VNODE:
91447636
A
1863 if (kevp->fflags & NOTE_EXTEND)
1864 fds->revents |= (fds->events & POLLEXTEND);
1865 if (kevp->fflags & NOTE_ATTRIB)
1866 fds->revents |= (fds->events & POLLATTRIB);
1867 if (kevp->fflags & NOTE_LINK)
1868 fds->revents |= (fds->events & POLLNLINK);
1869 if (kevp->fflags & NOTE_WRITE)
1870 fds->revents |= (fds->events & POLLWRITE);
1871 break;
1872 }
2d21ac55 1873
316670eb 1874 if (fds->revents != 0 && prev_revents == 0)
2d21ac55
A
1875 cont->pca_rfds++;
1876
91447636
A
1877 return 0;
1878}
1879
1880int
1881seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1882{
1883
1884 return (1);
1885}
1886
6d2010ae
A
1887/*
1888 * selcount
1889 *
1890 * Count the number of bits set in the input bit vector, and establish an
1891 * outstanding fp->f_iocount for each of the descriptors which will be in
1892 * use in the select operation.
1893 *
1894 * Parameters: p The process doing the select
1895 * ibits The input bit vector
1896 * nfd The number of fd's in the vector
1897 * countp Pointer to where to store the bit count
1898 *
1899 * Returns: 0 Success
1900 * EIO Bad per process open file table
1901 * EBADF One of the bits in the input bit vector
1902 * references an invalid fd
1903 *
1904 * Implicit: *countp (modified) Count of fd's
1905 *
1906 * Notes: This function is the first pass under the proc_fdlock() that
1907 * permits us to recognize invalid descriptors in the bit vector;
1908 * the may, however, not remain valid through the drop and
1909 * later reacquisition of the proc_fdlock().
1910 */
91447636 1911static int
6d2010ae 1912selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
91447636 1913{
2d21ac55
A
1914 struct filedesc *fdp = p->p_fd;
1915 int msk, i, j, fd;
1916 u_int32_t bits;
91447636 1917 struct fileproc *fp;
0b4e3aa0 1918 int n = 0;
91447636 1919 u_int32_t *iptr;
0b4e3aa0 1920 u_int nw;
91447636
A
1921 int error=0;
1922 int dropcount;
6d2010ae 1923 int need_wakeup = 0;
0b4e3aa0
A
1924
1925 /*
1926 * Problems when reboot; due to MacOSX signal probs
1927 * in Beaker1C ; verify that the p->p_fd is valid
1928 */
1929 if (fdp == NULL) {
2d21ac55 1930 *countp = 0;
0b4e3aa0
A
1931 return(EIO);
1932 }
0b4e3aa0
A
1933 nw = howmany(nfd, NFDBITS);
1934
91447636 1935 proc_fdlock(p);
0b4e3aa0
A
1936 for (msk = 0; msk < 3; msk++) {
1937 iptr = (u_int32_t *)&ibits[msk * nw];
1938 for (i = 0; i < nfd; i += NFDBITS) {
1939 bits = iptr[i/NFDBITS];
1940 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1941 bits &= ~(1 << j);
fe8ab488
A
1942
1943 if (fd < fdp->fd_nfiles)
1944 fp = fdp->fd_ofiles[fd];
1945 else
1946 fp = NULL;
1947
0b4e3aa0
A
1948 if (fp == NULL ||
1949 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2d21ac55 1950 *countp = 0;
91447636
A
1951 error = EBADF;
1952 goto bad;
0b4e3aa0 1953 }
91447636 1954 fp->f_iocount++;
0b4e3aa0
A
1955 n++;
1956 }
1957 }
1958 }
91447636
A
1959 proc_fdunlock(p);
1960
2d21ac55 1961 *countp = n;
91447636 1962 return (0);
6d2010ae 1963
91447636
A
1964bad:
1965 dropcount = 0;
1966
3e170ce0 1967 if (n == 0)
91447636 1968 goto out;
6d2010ae
A
1969 /* Ignore error return; it's already EBADF */
1970 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
91447636 1971
91447636
A
1972out:
1973 proc_fdunlock(p);
6d2010ae
A
1974 if (need_wakeup) {
1975 wakeup(&p->p_fpdrainwait);
1976 }
91447636
A
1977 return(error);
1978}
1979
6d2010ae
A
1980
1981/*
1982 * seldrop_locked
1983 *
1984 * Drop outstanding wait queue references set up during selscan(); drop the
1985 * outstanding per fileproc f_iocount() picked up during the selcount().
1986 *
1987 * Parameters: p Process performing the select
3e170ce0 1988 * ibits Input bit bector of fd's
6d2010ae
A
1989 * nfd Number of fd's
1990 * lim Limit to number of vector entries to
1991 * consider, or -1 for "all"
1992 * inselect True if
1993 * need_wakeup Pointer to flag to set to do a wakeup
1994 * if f_iocont on any descriptor goes to 0
1995 *
1996 * Returns: 0 Success
1997 * EBADF One or more fds in the bit vector
1998 * were invalid, but the rest
1999 * were successfully dropped
2000 *
2001 * Notes: An fd make become bad while the proc_fdlock() is not held,
2002 * if a multithreaded application closes the fd out from under
2003 * the in progress select. In this case, we still have to
2004 * clean up after the set up on the remaining fds.
2005 */
91447636 2006static int
6d2010ae 2007seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
91447636 2008{
2d21ac55 2009 struct filedesc *fdp = p->p_fd;
3e170ce0 2010 int msk, i, j, nc, fd;
2d21ac55 2011 u_int32_t bits;
91447636 2012 struct fileproc *fp;
91447636
A
2013 u_int32_t *iptr;
2014 u_int nw;
6d2010ae
A
2015 int error = 0;
2016 int dropcount = 0;
2017 uthread_t uth = get_bsdthread_info(current_thread());
3e170ce0 2018 struct _select_data *seldata;
6d2010ae
A
2019
2020 *need_wakeup = 0;
91447636
A
2021
2022 /*
2023 * Problems when reboot; due to MacOSX signal probs
2024 * in Beaker1C ; verify that the p->p_fd is valid
2025 */
2026 if (fdp == NULL) {
2027 return(EIO);
2028 }
2029
2030 nw = howmany(nfd, NFDBITS);
3e170ce0 2031 seldata = &uth->uu_kevent.ss_select_data;
91447636 2032
3e170ce0 2033 nc = 0;
91447636
A
2034 for (msk = 0; msk < 3; msk++) {
2035 iptr = (u_int32_t *)&ibits[msk * nw];
2036 for (i = 0; i < nfd; i += NFDBITS) {
2037 bits = iptr[i/NFDBITS];
2038 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
2039 bits &= ~(1 << j);
2040 fp = fdp->fd_ofiles[fd];
6d2010ae
A
2041 /*
2042 * If we've already dropped as many as were
2043 * counted/scanned, then we are done.
2044 */
2045 if ((fromselcount != 0) && (++dropcount > lim))
2046 goto done;
2047
3e170ce0
A
2048 /*
2049 * unlink even potentially NULL fileprocs.
2050 * If the FD was closed from under us, we
2051 * still need to cleanup the waitq links!
2052 */
2053 selunlinkfp(fp,
2054 seldata->wqp ? seldata->wqp[nc] : 0,
2055 uth->uu_wqset);
2056
2057 nc++;
2058
6d2010ae
A
2059 if (fp == NULL) {
2060 /* skip (now) bad fds */
2061 error = EBADF;
2062 continue;
2063 }
91447636 2064
6d2010ae
A
2065 fp->f_iocount--;
2066 if (fp->f_iocount < 0)
2067 panic("f_iocount overdecrement!");
2068
2069 if (fp->f_iocount == 0) {
2070 /*
2071 * The last iocount is responsible for clearing
2072 * selconfict flag - even if we didn't set it -
2073 * and is also responsible for waking up anyone
2074 * waiting on iocounts to drain.
2075 */
2076 if (fp->f_flags & FP_SELCONFLICT)
2077 fp->f_flags &= ~FP_SELCONFLICT;
2078 if (p->p_fpdrainwait) {
2079 p->p_fpdrainwait = 0;
2080 *need_wakeup = 1;
2081 }
91447636
A
2082 }
2083 }
2084 }
2085 }
6d2010ae
A
2086done:
2087 return (error);
2088}
2089
2090
2091static int
2092seldrop(struct proc *p, u_int32_t *ibits, int nfd)
2093{
2094 int error;
2095 int need_wakeup = 0;
2096
2097 proc_fdlock(p);
2098 error = seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0);
91447636 2099 proc_fdunlock(p);
6d2010ae
A
2100 if (need_wakeup) {
2101 wakeup(&p->p_fpdrainwait);
2102 }
2103 return (error);
0b4e3aa0
A
2104}
2105
1c79356b
A
2106/*
2107 * Record a select request.
2108 */
2109void
3e170ce0 2110selrecord(__unused struct proc *selector, struct selinfo *sip, void *s_data)
1c79356b 2111{
91447636 2112 thread_t cur_act = current_thread();
0b4e3aa0 2113 struct uthread * ut = get_bsdthread_info(cur_act);
3e170ce0
A
2114 /* on input, s_data points to the 64-bit ID of a reserved link object */
2115 uint64_t *reserved_link = (uint64_t *)s_data;
1c79356b 2116
0b4e3aa0
A
2117 /* need to look at collisions */
2118
0b4e3aa0 2119 /*do not record if this is second pass of select */
3e170ce0 2120 if (!s_data)
0b4e3aa0 2121 return;
1c79356b 2122
0b4e3aa0 2123 if ((sip->si_flags & SI_INITED) == 0) {
39037602 2124 waitq_init(&sip->si_waitq, SYNC_POLICY_FIFO);
0b4e3aa0
A
2125 sip->si_flags |= SI_INITED;
2126 sip->si_flags &= ~SI_CLEAR;
2127 }
2128
3e170ce0 2129 if (sip->si_flags & SI_RECORDED)
0b4e3aa0 2130 sip->si_flags |= SI_COLL;
3e170ce0 2131 else
0b4e3aa0
A
2132 sip->si_flags &= ~SI_COLL;
2133
2134 sip->si_flags |= SI_RECORDED;
3e170ce0
A
2135 /* note: this checks for pre-existing linkage */
2136 waitq_link(&sip->si_waitq, ut->uu_wqset,
2137 WAITQ_SHOULD_LOCK, reserved_link);
2138
2139 /*
2140 * Always consume the reserved link.
2141 * We can always call waitq_link_release() safely because if
2142 * waitq_link is successful, it consumes the link and resets the
2143 * value to 0, in which case our call to release becomes a no-op.
2144 * If waitq_link fails, then the following release call will actually
2145 * release the reserved link object.
2146 */
2147 waitq_link_release(*reserved_link);
2148 *reserved_link = 0;
2149
2150 /*
2151 * Use the s_data pointer as an output parameter as well
2152 * This avoids changing the prototype for this function which is
2153 * used by many kexts. We need to surface the waitq object
2154 * associated with the selinfo we just added to the thread's select
2155 * set. New waitq sets do not have back-pointers to set members, so
2156 * the only way to clear out set linkage objects is to go from the
2157 * waitq to the set. We use a memcpy because s_data could be
2158 * pointing to an unaligned value on the stack
2159 * (especially on 32-bit systems)
2160 */
2161 void *wqptr = (void *)&sip->si_waitq;
2162 memcpy((void *)s_data, (void *)&wqptr, sizeof(void *));
0b4e3aa0 2163
1c79356b
A
2164 return;
2165}
2166
2167void
2d21ac55 2168selwakeup(struct selinfo *sip)
1c79356b 2169{
1c79356b 2170
0b4e3aa0 2171 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 2172 return;
0b4e3aa0 2173 }
1c79356b
A
2174
2175 if (sip->si_flags & SI_COLL) {
2176 nselcoll++;
2177 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
2178#if 0
2179 /* will not support */
2180 //wakeup((caddr_t)&selwait);
2181#endif
1c79356b 2182 }
1c79356b 2183
0b4e3aa0 2184 if (sip->si_flags & SI_RECORDED) {
3e170ce0
A
2185 waitq_wakeup64_all(&sip->si_waitq, NO_EVENT64,
2186 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
0b4e3aa0 2187 sip->si_flags &= ~SI_RECORDED;
1c79356b 2188 }
1c79356b 2189
1c79356b
A
2190}
2191
2192void
2d21ac55 2193selthreadclear(struct selinfo *sip)
1c79356b 2194{
3e170ce0 2195 struct waitq *wq;
1c79356b 2196
0b4e3aa0
A
2197 if ((sip->si_flags & SI_INITED) == 0) {
2198 return;
2199 }
2200 if (sip->si_flags & SI_RECORDED) {
2201 selwakeup(sip);
2202 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 2203 }
0b4e3aa0 2204 sip->si_flags |= SI_CLEAR;
3e170ce0
A
2205 sip->si_flags &= ~SI_INITED;
2206
2207 wq = &sip->si_waitq;
2208
2209 /*
2210 * Higher level logic may have a handle on this waitq's prepost ID,
2211 * but that's OK because the waitq_deinit will remove/invalidate the
2212 * prepost object (as well as mark the waitq invalid). This de-couples
2213 * us from any callers that may have a handle to this waitq via the
2214 * prepost ID.
2215 */
2216 waitq_deinit(wq);
1c79356b
A
2217}
2218
2219
91447636
A
2220
2221
91447636
A
2222#define DBG_POST 0x10
2223#define DBG_WATCH 0x11
2224#define DBG_WAIT 0x12
2225#define DBG_MOD 0x13
2226#define DBG_EWAKEUP 0x14
2227#define DBG_ENQUEUE 0x15
2228#define DBG_DEQUEUE 0x16
2229
2230#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
2231#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
2232#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
2233#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
2234#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
2235#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
2236#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
2237
2238
2239#define EVPROCDEQUE(p, evq) do { \
2240 proc_lock(p); \
2241 if (evq->ee_flags & EV_QUEUED) { \
2242 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \
2243 evq->ee_flags &= ~EV_QUEUED; \
2244 } \
2245 proc_unlock(p); \
2246} while (0);
2247
1c79356b
A
2248
2249/*
2250 * called upon socket close. deque and free all events for
91447636 2251 * the socket... socket must be locked by caller.
1c79356b 2252 */
9bccf70c 2253void
1c79356b
A
2254evsofree(struct socket *sp)
2255{
91447636
A
2256 struct eventqelt *evq, *next;
2257 proc_t p;
2258
2259 if (sp == NULL)
2260 return;
1c79356b 2261
91447636
A
2262 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
2263 next = evq->ee_slist.tqe_next;
2264 p = evq->ee_proc;
1c79356b 2265
91447636
A
2266 if (evq->ee_flags & EV_QUEUED) {
2267 EVPROCDEQUE(p, evq);
2268 }
2269 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
2270 FREE(evq, M_TEMP);
2271 }
1c79356b
A
2272}
2273
2274
91447636
A
2275/*
2276 * called upon pipe close. deque and free all events for
2277 * the pipe... pipe must be locked by caller
2278 */
2279void
2280evpipefree(struct pipe *cpipe)
2281{
2282 struct eventqelt *evq, *next;
2283 proc_t p;
1c79356b 2284
91447636
A
2285 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
2286 next = evq->ee_slist.tqe_next;
2287 p = evq->ee_proc;
1c79356b 2288
91447636
A
2289 EVPROCDEQUE(p, evq);
2290
2291 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
2292 FREE(evq, M_TEMP);
2293 }
2294}
1c79356b
A
2295
2296
2297/*
91447636
A
2298 * enqueue this event if it's not already queued. wakeup
2299 * the proc if we do queue this event to it...
2300 * entered with proc lock held... we drop it before
2301 * doing the wakeup and return in that state
1c79356b 2302 */
91447636
A
2303static void
2304evprocenque(struct eventqelt *evq)
1c79356b 2305{
91447636
A
2306 proc_t p;
2307
2308 assert(evq);
2309 p = evq->ee_proc;
2310
2d21ac55 2311 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
91447636
A
2312
2313 proc_lock(p);
2314
2315 if (evq->ee_flags & EV_QUEUED) {
2316 proc_unlock(p);
2317
2318 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
2319 return;
2320 }
2321 evq->ee_flags |= EV_QUEUED;
2322
2323 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
2324
2325 proc_unlock(p);
2326
2327 wakeup(&p->p_evlist);
2328
2329 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1c79356b
A
2330}
2331
91447636 2332
1c79356b 2333/*
91447636 2334 * pipe lock must be taken by the caller
1c79356b 2335 */
9bccf70c 2336void
91447636 2337postpipeevent(struct pipe *pipep, int event)
1c79356b 2338{
91447636
A
2339 int mask;
2340 struct eventqelt *evq;
2341
2342 if (pipep == NULL)
2343 return;
2344 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
2345
2346 for (evq = pipep->pipe_evlist.tqh_first;
2347 evq != NULL; evq = evq->ee_slist.tqe_next) {
2348
2349 if (evq->ee_eventmask == 0)
2350 continue;
2351 mask = 0;
2352
2353 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
2354
2355 case EV_RWBYTES:
2356 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
2357 mask |= EV_RE;
2358 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
2359 }
2360 if ((evq->ee_eventmask & EV_WR) &&
316670eb 2361 (MAX(pipep->pipe_buffer.size,PIPE_SIZE) - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
91447636
A
2362
2363 if (pipep->pipe_state & PIPE_EOF) {
2364 mask |= EV_WR|EV_RESET;
2365 break;
2366 }
2367 mask |= EV_WR;
316670eb 2368 evq->ee_req.er_wcnt = MAX(pipep->pipe_buffer.size, PIPE_SIZE) - pipep->pipe_buffer.cnt;
91447636
A
2369 }
2370 break;
2371
2372 case EV_WCLOSED:
2373 case EV_RCLOSED:
2374 if ((evq->ee_eventmask & EV_RE)) {
2375 mask |= EV_RE|EV_RCLOSED;
2376 }
2377 if ((evq->ee_eventmask & EV_WR)) {
2378 mask |= EV_WR|EV_WCLOSED;
2379 }
2380 break;
2381
2382 default:
2383 return;
2384 }
2385 if (mask) {
2386 /*
2387 * disarm... postevents are nops until this event is 'read' via
2388 * waitevent and then re-armed via modwatch
2389 */
2390 evq->ee_eventmask = 0;
2391
2392 /*
2393 * since events are disarmed until after the waitevent
2394 * the ee_req.er_xxxx fields can't change once we've
2395 * inserted this event into the proc queue...
2396 * therefore, the waitevent will see a 'consistent'
2397 * snapshot of the event, even though it won't hold
2398 * the pipe lock, and we're updating the event outside
2399 * of the proc lock, which it will hold
2400 */
2401 evq->ee_req.er_eventbits |= mask;
2402
2d21ac55 2403 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
91447636
A
2404
2405 evprocenque(evq);
2406 }
2407 }
2408 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
1c79356b
A
2409}
2410
2d21ac55 2411#if SOCKETS
1c79356b 2412/*
91447636
A
2413 * given either a sockbuf or a socket run down the
2414 * event list and queue ready events found...
2415 * the socket must be locked by the caller
1c79356b 2416 */
91447636
A
2417void
2418postevent(struct socket *sp, struct sockbuf *sb, int event)
1c79356b 2419{
91447636
A
2420 int mask;
2421 struct eventqelt *evq;
2422 struct tcpcb *tp;
2423
2424 if (sb)
2425 sp = sb->sb_so;
2426 if (sp == NULL)
2427 return;
2428
2429 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2430
2431 for (evq = sp->so_evlist.tqh_first;
2432 evq != NULL; evq = evq->ee_slist.tqe_next) {
2433
2434 if (evq->ee_eventmask == 0)
2435 continue;
2436 mask = 0;
2437
2438 /* ready for reading:
2439 - byte cnt >= receive low water mark
2440 - read-half of conn closed
2441 - conn pending for listening sock
2442 - socket error pending
2443
2444 ready for writing
2445 - byte cnt avail >= send low water mark
2446 - write half of conn closed
2447 - socket error pending
2448 - non-blocking conn completed successfully
2449
2450 exception pending
2451 - out of band data
2452 - sock at out of band mark
2453 */
2454
2455 switch (event & EV_DMASK) {
2456
2457 case EV_OOB:
2458 if ((evq->ee_eventmask & EV_EX)) {
2459 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2460 mask |= EV_EX|EV_OOB;
2461 }
2462 break;
2463
2464 case EV_RWBYTES|EV_OOB:
2465 if ((evq->ee_eventmask & EV_EX)) {
2466 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2467 mask |= EV_EX|EV_OOB;
2468 }
2469 /*
2470 * fall into the next case
2471 */
2472 case EV_RWBYTES:
2473 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
39236c6e
A
2474 /* for AFP/OT purposes; may go away in future */
2475 if ((SOCK_DOM(sp) == PF_INET ||
2476 SOCK_DOM(sp) == PF_INET6) &&
2477 SOCK_PROTO(sp) == IPPROTO_TCP &&
2478 (sp->so_error == ECONNREFUSED ||
2479 sp->so_error == ECONNRESET)) {
2480 if (sp->so_pcb == NULL ||
2481 sotoinpcb(sp)->inp_state ==
2482 INPCB_STATE_DEAD ||
2483 (tp = sototcpcb(sp)) == NULL ||
2484 tp->t_state == TCPS_CLOSED) {
2485 mask |= EV_RE|EV_RESET;
2486 break;
91447636
A
2487 }
2488 }
2489 mask |= EV_RE;
2490 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2491
2492 if (sp->so_state & SS_CANTRCVMORE) {
2493 mask |= EV_FIN;
2494 break;
2495 }
2496 }
2497 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
39236c6e
A
2498 /* for AFP/OT purposes; may go away in future */
2499 if ((SOCK_DOM(sp) == PF_INET ||
2500 SOCK_DOM(sp) == PF_INET6) &&
2501 SOCK_PROTO(sp) == IPPROTO_TCP &&
2502 (sp->so_error == ECONNREFUSED ||
2503 sp->so_error == ECONNRESET)) {
2504 if (sp->so_pcb == NULL ||
2505 sotoinpcb(sp)->inp_state ==
2506 INPCB_STATE_DEAD ||
2507 (tp = sototcpcb(sp)) == NULL ||
2508 tp->t_state == TCPS_CLOSED) {
2509 mask |= EV_WR|EV_RESET;
2510 break;
91447636
A
2511 }
2512 }
2513 mask |= EV_WR;
2514 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2515 }
2516 break;
2517
2518 case EV_RCONN:
2519 if ((evq->ee_eventmask & EV_RE)) {
2520 mask |= EV_RE|EV_RCONN;
2521 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
2522 }
2523 break;
2524
2525 case EV_WCONN:
2526 if ((evq->ee_eventmask & EV_WR)) {
2527 mask |= EV_WR|EV_WCONN;
2528 }
2529 break;
2530
2531 case EV_RCLOSED:
2532 if ((evq->ee_eventmask & EV_RE)) {
2533 mask |= EV_RE|EV_RCLOSED;
2534 }
2535 break;
2536
2537 case EV_WCLOSED:
2538 if ((evq->ee_eventmask & EV_WR)) {
2539 mask |= EV_WR|EV_WCLOSED;
2540 }
2541 break;
2542
2543 case EV_FIN:
2544 if (evq->ee_eventmask & EV_RE) {
2545 mask |= EV_RE|EV_FIN;
2546 }
2547 break;
2548
2549 case EV_RESET:
2550 case EV_TIMEOUT:
2551 if (evq->ee_eventmask & EV_RE) {
2552 mask |= EV_RE | event;
2553 }
2554 if (evq->ee_eventmask & EV_WR) {
2555 mask |= EV_WR | event;
2556 }
2557 break;
2558
2559 default:
2560 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2561 return;
2562 } /* switch */
2563
2564 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2565
2566 if (mask) {
2567 /*
2568 * disarm... postevents are nops until this event is 'read' via
2569 * waitevent and then re-armed via modwatch
2570 */
2571 evq->ee_eventmask = 0;
2572
2573 /*
2574 * since events are disarmed until after the waitevent
2575 * the ee_req.er_xxxx fields can't change once we've
2576 * inserted this event into the proc queue...
2577 * since waitevent can't see this event until we
2578 * enqueue it, waitevent will see a 'consistent'
2579 * snapshot of the event, even though it won't hold
2580 * the socket lock, and we're updating the event outside
2581 * of the proc lock, which it will hold
2582 */
2583 evq->ee_req.er_eventbits |= mask;
2584
2585 evprocenque(evq);
2586 }
2587 }
2588 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
1c79356b 2589}
2d21ac55 2590#endif /* SOCKETS */
1c79356b 2591
1c79356b
A
2592
2593/*
2594 * watchevent system call. user passes us an event to watch
2595 * for. we malloc an event object, initialize it, and queue
2596 * it to the open socket. when the event occurs, postevent()
2597 * will enque it back to our proc where we can retrieve it
2598 * via waitevent().
2599 *
2600 * should this prevent duplicate events on same socket?
2d21ac55
A
2601 *
2602 * Returns:
2603 * ENOMEM No memory for operation
2604 * copyin:EFAULT
1c79356b
A
2605 */
2606int
91447636 2607watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
1c79356b 2608{
91447636
A
2609 struct eventqelt *evq = (struct eventqelt *)0;
2610 struct eventqelt *np = NULL;
2d21ac55 2611 struct eventreq64 *erp;
91447636
A
2612 struct fileproc *fp = NULL;
2613 int error;
2614
2615 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2616
2617 // get a qelt and fill with users req
2618 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2619
2620 if (evq == NULL)
2d21ac55 2621 return (ENOMEM);
91447636
A
2622 erp = &evq->ee_req;
2623
2624 // get users request pkt
91447636 2625
2d21ac55
A
2626 if (IS_64BIT_PROCESS(p)) {
2627 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2628 } else {
2629 struct eventreq32 er32;
2630
2631 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2632 if (error == 0) {
2633 /*
2634 * the user only passes in the
2635 * er_type, er_handle and er_data...
2636 * the other fields are initialized
2637 * below, so don't bother to copy
2638 */
2639 erp->er_type = er32.er_type;
2640 erp->er_handle = er32.er_handle;
2641 erp->er_data = (user_addr_t)er32.er_data;
2642 }
2643 }
2644 if (error) {
2645 FREE(evq, M_TEMP);
91447636 2646 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2d21ac55
A
2647
2648 return(error);
91447636 2649 }
2d21ac55 2650 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2651
2652 // validate, freeing qelt if errors
2653 error = 0;
2654 proc_fdlock(p);
2655
2656 if (erp->er_type != EV_FD) {
2657 error = EINVAL;
2658 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2659 error = EBADF;
2d21ac55 2660#if SOCKETS
91447636
A
2661 } else if (fp->f_type == DTYPE_SOCKET) {
2662 socket_lock((struct socket *)fp->f_data, 1);
2663 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2664#endif /* SOCKETS */
91447636
A
2665 } else if (fp->f_type == DTYPE_PIPE) {
2666 PIPE_LOCK((struct pipe *)fp->f_data);
2667 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2668 } else {
2669 fp_drop(p, erp->er_handle, fp, 1);
2670 error = EINVAL;
2671 }
2672 proc_fdunlock(p);
2673
2674 if (error) {
2675 FREE(evq, M_TEMP);
2676
2677 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2678 return(error);
2679 }
2680
2681 /*
2682 * only allow one watch per file per proc
2683 */
2684 for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2685 if (np->ee_proc == p) {
2d21ac55 2686#if SOCKETS
91447636
A
2687 if (fp->f_type == DTYPE_SOCKET)
2688 socket_unlock((struct socket *)fp->f_data, 1);
2689 else
2d21ac55 2690#endif /* SOCKETS */
91447636
A
2691 PIPE_UNLOCK((struct pipe *)fp->f_data);
2692 fp_drop(p, erp->er_handle, fp, 0);
2693 FREE(evq, M_TEMP);
2694
2695 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2696 return(EINVAL);
2697 }
2698 }
2699 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2700 evq->ee_proc = p;
2701 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2702 evq->ee_flags = 0;
2703
2d21ac55 2704#if SOCKETS
91447636
A
2705 if (fp->f_type == DTYPE_SOCKET) {
2706 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2707 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2708
2709 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2710 } else
2711#endif /* SOCKETS */
2712 {
91447636
A
2713 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2714 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2715
2716 PIPE_UNLOCK((struct pipe *)fp->f_data);
2717 }
2718 fp_drop_event(p, erp->er_handle, fp);
2719
2720 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2721 return(0);
1c79356b
A
2722}
2723
91447636 2724
1c79356b
A
2725
2726/*
2727 * waitevent system call.
2728 * grabs the next waiting event for this proc and returns
2729 * it. if no events, user can request to sleep with timeout
2d21ac55
A
2730 * or without or poll mode
2731 * ((tv != NULL && interval == 0) || tv == -1)
1c79356b
A
2732 */
2733int
91447636 2734waitevent(proc_t p, struct waitevent_args *uap, int *retval)
1c79356b 2735{
91447636
A
2736 int error = 0;
2737 struct eventqelt *evq;
2d21ac55 2738 struct eventreq64 *erp;
9bccf70c 2739 uint64_t abstime, interval;
2d21ac55
A
2740 boolean_t fast_poll = FALSE;
2741 union {
2742 struct eventreq64 er64;
2743 struct eventreq32 er32;
2744 } uer;
2745
2746 interval = 0;
1c79356b
A
2747
2748 if (uap->tv) {
9bccf70c 2749 struct timeval atv;
2d21ac55
A
2750 /*
2751 * check for fast poll method
2752 */
2753 if (IS_64BIT_PROCESS(p)) {
2754 if (uap->tv == (user_addr_t)-1)
2755 fast_poll = TRUE;
2756 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2757 fast_poll = TRUE;
2758
2759 if (fast_poll == TRUE) {
2760 if (p->p_evlist.tqh_first == NULL) {
2761 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2762 /*
2763 * poll failed
2764 */
2765 *retval = 1;
2766 return (0);
2767 }
2768 proc_lock(p);
2769 goto retry;
2770 }
b0d623f7
A
2771 if (IS_64BIT_PROCESS(p)) {
2772 struct user64_timeval atv64;
2773 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
2774 /* Loses resolution - assume timeout < 68 years */
2775 atv.tv_sec = atv64.tv_sec;
2776 atv.tv_usec = atv64.tv_usec;
2777 } else {
2778 struct user32_timeval atv32;
2779 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
2780 atv.tv_sec = atv32.tv_sec;
2781 atv.tv_usec = atv32.tv_usec;
2782 }
9bccf70c 2783
1c79356b 2784 if (error)
9bccf70c 2785 return(error);
1c79356b
A
2786 if (itimerfix(&atv)) {
2787 error = EINVAL;
2788 return(error);
2789 }
9bccf70c 2790 interval = tvtoabstime(&atv);
2d21ac55 2791 }
9bccf70c 2792 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1c79356b 2793
91447636 2794 proc_lock(p);
1c79356b 2795retry:
91447636
A
2796 if ((evq = p->p_evlist.tqh_first) != NULL) {
2797 /*
2798 * found one... make a local copy while it's still on the queue
2799 * to prevent it from changing while in the midst of copying
2800 * don't want to hold the proc lock across a copyout because
2801 * it might block on a page fault at the target in user space
2802 */
2d21ac55 2803 erp = &evq->ee_req;
91447636 2804
2d21ac55
A
2805 if (IS_64BIT_PROCESS(p))
2806 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2807 else {
2808 uer.er32.er_type = erp->er_type;
2809 uer.er32.er_handle = erp->er_handle;
2810 uer.er32.er_data = (uint32_t)erp->er_data;
2811 uer.er32.er_ecnt = erp->er_ecnt;
2812 uer.er32.er_rcnt = erp->er_rcnt;
2813 uer.er32.er_wcnt = erp->er_wcnt;
2814 uer.er32.er_eventbits = erp->er_eventbits;
2815 }
91447636
A
2816 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2817
2818 evq->ee_flags &= ~EV_QUEUED;
1c79356b 2819
91447636
A
2820 proc_unlock(p);
2821
2d21ac55
A
2822 if (IS_64BIT_PROCESS(p))
2823 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2824 else
2825 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
91447636
A
2826
2827 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2d21ac55 2828 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
9bccf70c
A
2829 return (error);
2830 }
2831 else {
2832 if (uap->tv && interval == 0) {
91447636 2833 proc_unlock(p);
9bccf70c 2834 *retval = 1; // poll failed
9bccf70c 2835
91447636 2836 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
9bccf70c
A
2837 return (error);
2838 }
9bccf70c 2839 if (interval != 0)
55e303ae 2840 clock_absolutetime_interval_to_deadline(interval, &abstime);
91447636
A
2841 else
2842 abstime = 0;
9bccf70c 2843
2d21ac55 2844 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
91447636
A
2845
2846 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2847
2d21ac55 2848 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
91447636 2849
9bccf70c
A
2850 if (error == 0)
2851 goto retry;
2852 if (error == ERESTART)
2853 error = EINTR;
2854 if (error == EWOULDBLOCK) {
2855 *retval = 1;
2856 error = 0;
2857 }
2858 }
91447636 2859 proc_unlock(p);
9bccf70c
A
2860
2861 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
9bccf70c 2862 return (error);
1c79356b
A
2863}
2864
1c79356b
A
2865
2866/*
2867 * modwatch system call. user passes in event to modify.
2868 * if we find it we reset the event bits and que/deque event
2869 * it needed.
2870 */
2871int
91447636 2872modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
1c79356b 2873{
2d21ac55
A
2874 struct eventreq64 er;
2875 struct eventreq64 *erp = &er;
2876 struct eventqelt *evq = NULL; /* protected by error return */
91447636
A
2877 int error;
2878 struct fileproc *fp;
2879 int flag;
2880
2881 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2882
2883 /*
2884 * get user's request pkt
2d21ac55
A
2885 * just need the er_type and er_handle which sit above the
2886 * problematic er_data (32/64 issue)... so only copy in
2887 * those 2 fields
91447636 2888 */
2d21ac55
A
2889 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2890 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
91447636
A
2891 return(error);
2892 }
2893 proc_fdlock(p);
2894
2895 if (erp->er_type != EV_FD) {
2896 error = EINVAL;
2897 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2898 error = EBADF;
2d21ac55 2899#if SOCKETS
91447636
A
2900 } else if (fp->f_type == DTYPE_SOCKET) {
2901 socket_lock((struct socket *)fp->f_data, 1);
2902 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2903#endif /* SOCKETS */
91447636
A
2904 } else if (fp->f_type == DTYPE_PIPE) {
2905 PIPE_LOCK((struct pipe *)fp->f_data);
2906 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2907 } else {
2908 fp_drop(p, erp->er_handle, fp, 1);
2909 error = EINVAL;
2910 }
2911
2912 if (error) {
2913 proc_fdunlock(p);
2914 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2915 return(error);
2916 }
2917
2918 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2919 fp->f_flags &= ~FP_WAITEVENT;
2920 }
2921 proc_fdunlock(p);
2922
2923 // locate event if possible
2924 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2925 if (evq->ee_proc == p)
2926 break;
2927 }
2928 if (evq == NULL) {
2d21ac55 2929#if SOCKETS
91447636
A
2930 if (fp->f_type == DTYPE_SOCKET)
2931 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2932 else
2933#endif /* SOCKETS */
91447636
A
2934 PIPE_UNLOCK((struct pipe *)fp->f_data);
2935 fp_drop(p, erp->er_handle, fp, 0);
2936 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2937 return(EINVAL);
2938 }
2d21ac55 2939 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2940
2941 if (uap->u_eventmask == EV_RM) {
2942 EVPROCDEQUE(p, evq);
2943
2d21ac55 2944#if SOCKETS
91447636
A
2945 if (fp->f_type == DTYPE_SOCKET) {
2946 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2947 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2948 } else
2949#endif /* SOCKETS */
2950 {
91447636
A
2951 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2952 PIPE_UNLOCK((struct pipe *)fp->f_data);
2953 }
2954 fp_drop(p, erp->er_handle, fp, 0);
2955 FREE(evq, M_TEMP);
2956 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2957 return(0);
2958 }
2959 switch (uap->u_eventmask & EV_MASK) {
1c79356b 2960
91447636
A
2961 case 0:
2962 flag = 0;
2963 break;
2964
2965 case EV_RE:
2966 case EV_WR:
2967 case EV_RE|EV_WR:
2968 flag = EV_RWBYTES;
2969 break;
2970
2971 case EV_EX:
2972 flag = EV_OOB;
2973 break;
2974
2975 case EV_EX|EV_RE:
2976 case EV_EX|EV_WR:
2977 case EV_EX|EV_RE|EV_WR:
2978 flag = EV_OOB|EV_RWBYTES;
2979 break;
2980
2981 default:
2d21ac55 2982#if SOCKETS
91447636
A
2983 if (fp->f_type == DTYPE_SOCKET)
2984 socket_unlock((struct socket *)fp->f_data, 1);
2985 else
2d21ac55 2986#endif /* SOCKETS */
91447636
A
2987 PIPE_UNLOCK((struct pipe *)fp->f_data);
2988 fp_drop(p, erp->er_handle, fp, 0);
2989 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2990 return(EINVAL);
2991 }
2992 /*
2993 * since we're holding the socket/pipe lock, the event
2994 * cannot go from the unqueued state to the queued state
2995 * however, it can go from the queued state to the unqueued state
2996 * since that direction is protected by the proc_lock...
2997 * so do a quick check for EV_QUEUED w/o holding the proc lock
2998 * since by far the common case will be NOT EV_QUEUED, this saves
2999 * us taking the proc_lock the majority of the time
3000 */
3001 if (evq->ee_flags & EV_QUEUED) {
3002 /*
3003 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
3004 */
3005 EVPROCDEQUE(p, evq);
3006 }
3007 /*
3008 * while the event is off the proc queue and
3009 * we're holding the socket/pipe lock
3010 * it's safe to update these fields...
3011 */
3012 evq->ee_req.er_eventbits = 0;
3013 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
3014
2d21ac55 3015#if SOCKETS
91447636
A
3016 if (fp->f_type == DTYPE_SOCKET) {
3017 postevent((struct socket *)fp->f_data, 0, flag);
3018 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
3019 } else
3020#endif /* SOCKETS */
3021 {
91447636
A
3022 postpipeevent((struct pipe *)fp->f_data, flag);
3023 PIPE_UNLOCK((struct pipe *)fp->f_data);
3024 }
3025 fp_drop(p, erp->er_handle, fp, 0);
2d21ac55 3026 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
91447636 3027 return(0);
1c79356b 3028}
91447636
A
3029
3030/* this routine is called from the close of fd with proc_fdlock held */
3031int
3032waitevent_close(struct proc *p, struct fileproc *fp)
3033{
3034 struct eventqelt *evq;
3035
3036
3037 fp->f_flags &= ~FP_WAITEVENT;
3038
2d21ac55 3039#if SOCKETS
91447636
A
3040 if (fp->f_type == DTYPE_SOCKET) {
3041 socket_lock((struct socket *)fp->f_data, 1);
3042 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55
A
3043 } else
3044#endif /* SOCKETS */
3045 if (fp->f_type == DTYPE_PIPE) {
91447636
A
3046 PIPE_LOCK((struct pipe *)fp->f_data);
3047 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
3048 }
3049 else {
3050 return(EINVAL);
3051 }
3052 proc_fdunlock(p);
3053
3054
3055 // locate event if possible
3056 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
3057 if (evq->ee_proc == p)
3058 break;
3059 }
3060 if (evq == NULL) {
2d21ac55 3061#if SOCKETS
91447636
A
3062 if (fp->f_type == DTYPE_SOCKET)
3063 socket_unlock((struct socket *)fp->f_data, 1);
3064 else
2d21ac55 3065#endif /* SOCKETS */
91447636
A
3066 PIPE_UNLOCK((struct pipe *)fp->f_data);
3067
3068 proc_fdlock(p);
3069
3070 return(EINVAL);
3071 }
3072 EVPROCDEQUE(p, evq);
3073
2d21ac55 3074#if SOCKETS
91447636
A
3075 if (fp->f_type == DTYPE_SOCKET) {
3076 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
3077 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
3078 } else
3079#endif /* SOCKETS */
3080 {
91447636
A
3081 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
3082 PIPE_UNLOCK((struct pipe *)fp->f_data);
3083 }
3084 FREE(evq, M_TEMP);
3085
3086 proc_fdlock(p);
3087
3088 return(0);
3089}
3090
2d21ac55
A
3091
3092/*
3093 * gethostuuid
3094 *
3095 * Description: Get the host UUID from IOKit and return it to user space.
3096 *
3097 * Parameters: uuid_buf Pointer to buffer to receive UUID
3098 * timeout Timespec for timout
39236c6e 3099 * spi SPI, skip sandbox check (temporary)
2d21ac55
A
3100 *
3101 * Returns: 0 Success
3102 * EWOULDBLOCK Timeout is too short
3103 * copyout:EFAULT Bad user buffer
fe8ab488 3104 * mac_system_check_info:EPERM Client not allowed to perform this operation
2d21ac55
A
3105 *
3106 * Notes: A timeout seems redundant, since if it's tolerable to not
3107 * have a system UUID in hand, then why ask for one?
3108 */
3109int
b0d623f7 3110gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2d21ac55
A
3111{
3112 kern_return_t kret;
3113 int error;
3114 mach_timespec_t mach_ts; /* for IOKit call */
3115 __darwin_uuid_t uuid_kern; /* for IOKit call */
3116
39236c6e 3117 if (!uap->spi) {
5ba3f43e
A
3118#if CONFIG_EMBEDDED
3119#if CONFIG_MACF
3120 if ((error = mac_system_check_info(kauth_cred_get(), "hw.uuid")) != 0) {
3121 /* EPERM invokes userspace upcall if present */
3122 return (error);
3123 }
3124#endif
3125#endif
39236c6e
A
3126 }
3127
2d21ac55
A
3128 /* Convert the 32/64 bit timespec into a mach_timespec_t */
3129 if ( proc_is64bit(p) ) {
b0d623f7 3130 struct user64_timespec ts;
2d21ac55
A
3131 error = copyin(uap->timeoutp, &ts, sizeof(ts));
3132 if (error)
3133 return (error);
3134 mach_ts.tv_sec = ts.tv_sec;
3135 mach_ts.tv_nsec = ts.tv_nsec;
3136 } else {
b0d623f7 3137 struct user32_timespec ts;
2d21ac55
A
3138 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
3139 if (error)
3140 return (error);
3141 mach_ts.tv_sec = ts.tv_sec;
3142 mach_ts.tv_nsec = ts.tv_nsec;
3143 }
3144
3145 /* Call IOKit with the stack buffer to get the UUID */
3146 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
3147
3148 /*
3149 * If we get it, copy out the data to the user buffer; note that a
3150 * uuid_t is an array of characters, so this is size invariant for
3151 * 32 vs. 64 bit.
3152 */
3153 if (kret == KERN_SUCCESS) {
3154 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
3155 } else {
3156 error = EWOULDBLOCK;
3157 }
3158
3159 return (error);
3160}
316670eb
A
3161
3162/*
3163 * ledger
3164 *
3165 * Description: Omnibus system call for ledger operations
3166 */
3167int
3168ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
3169{
39236c6e
A
3170#if !CONFIG_MACF
3171#pragma unused(p)
3172#endif
316670eb
A
3173 int rval, pid, len, error;
3174#ifdef LEDGER_DEBUG
3175 struct ledger_limit_args lla;
3176#endif
3177 task_t task;
3178 proc_t proc;
3179
3180 /* Finish copying in the necessary args before taking the proc lock */
3181 error = 0;
3182 len = 0;
3183 if (args->cmd == LEDGER_ENTRY_INFO)
3184 error = copyin(args->arg3, (char *)&len, sizeof (len));
3185 else if (args->cmd == LEDGER_TEMPLATE_INFO)
3186 error = copyin(args->arg2, (char *)&len, sizeof (len));
316670eb 3187 else if (args->cmd == LEDGER_LIMIT)
813fb2f6 3188#ifdef LEDGER_DEBUG
316670eb 3189 error = copyin(args->arg2, (char *)&lla, sizeof (lla));
813fb2f6
A
3190#else
3191 return (EINVAL);
316670eb 3192#endif
ecc0ceb4
A
3193 else if ((args->cmd < 0) || (args->cmd > LEDGER_MAX_CMD))
3194 return (EINVAL);
3195
316670eb
A
3196 if (error)
3197 return (error);
3198 if (len < 0)
3199 return (EINVAL);
3200
3201 rval = 0;
3202 if (args->cmd != LEDGER_TEMPLATE_INFO) {
3203 pid = args->arg1;
3204 proc = proc_find(pid);
3205 if (proc == NULL)
3206 return (ESRCH);
3207
3208#if CONFIG_MACF
3209 error = mac_proc_check_ledger(p, proc, args->cmd);
3210 if (error) {
3211 proc_rele(proc);
3212 return (error);
3213 }
3214#endif
3215
3216 task = proc->task;
3217 }
3218
3219 switch (args->cmd) {
3220#ifdef LEDGER_DEBUG
3221 case LEDGER_LIMIT: {
39236c6e 3222 if (!kauth_cred_issuser(kauth_cred_get()))
316670eb
A
3223 rval = EPERM;
3224 rval = ledger_limit(task, &lla);
3225 proc_rele(proc);
3226 break;
3227 }
3228#endif
3229 case LEDGER_INFO: {
3230 struct ledger_info info;
3231
3232 rval = ledger_info(task, &info);
3233 proc_rele(proc);
3234 if (rval == 0)
3235 rval = copyout(&info, args->arg2,
3236 sizeof (info));
3237 break;
3238 }
3239
3240 case LEDGER_ENTRY_INFO: {
3241 void *buf;
3242 int sz;
3243
39236c6e 3244 rval = ledger_get_task_entry_info_multiple(task, &buf, &len);
316670eb 3245 proc_rele(proc);
39037602 3246 if ((rval == 0) && (len >= 0)) {
316670eb
A
3247 sz = len * sizeof (struct ledger_entry_info);
3248 rval = copyout(buf, args->arg2, sz);
3249 kfree(buf, sz);
3250 }
3251 if (rval == 0)
3252 rval = copyout(&len, args->arg3, sizeof (len));
3253 break;
3254 }
3255
3256 case LEDGER_TEMPLATE_INFO: {
3257 void *buf;
3258 int sz;
3259
3260 rval = ledger_template_info(&buf, &len);
39037602 3261 if ((rval == 0) && (len >= 0)) {
316670eb
A
3262 sz = len * sizeof (struct ledger_template_info);
3263 rval = copyout(buf, args->arg1, sz);
3264 kfree(buf, sz);
3265 }
3266 if (rval == 0)
3267 rval = copyout(&len, args->arg2, sizeof (len));
3268 break;
3269 }
3270
3271 default:
ecc0ceb4
A
3272 panic("ledger syscall logic error -- command type %d", args->cmd);
3273 proc_rele(proc);
316670eb
A
3274 rval = EINVAL;
3275 }
3276
3277 return (rval);
3278}
39236c6e 3279
39236c6e
A
3280int
3281telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval)
3282{
3283 int error = 0;
3284
3285 switch (args->cmd) {
fe8ab488 3286#if CONFIG_TELEMETRY
39236c6e
A
3287 case TELEMETRY_CMD_TIMER_EVENT:
3288 error = telemetry_timer_event(args->deadline, args->interval, args->leeway);
3289 break;
fe8ab488
A
3290#endif /* CONFIG_TELEMETRY */
3291 case TELEMETRY_CMD_VOUCHER_NAME:
3292 if (thread_set_voucher_name((mach_port_name_t)args->deadline))
3293 error = EINVAL;
3294 break;
3295
39236c6e
A
3296 default:
3297 error = EINVAL;
3298 break;
3299 }
3300
3301 return (error);
3302}
3e170ce0 3303
5ba3f43e 3304#if DEVELOPMENT || DEBUG
3e170ce0
A
3305#if CONFIG_WAITQ_DEBUG
3306static uint64_t g_wqset_num = 0;
3307struct g_wqset {
3308 queue_chain_t link;
3309 struct waitq_set *wqset;
3310};
3311
3312static queue_head_t g_wqset_list;
3313static struct waitq_set *g_waitq_set = NULL;
3314
3315static inline struct waitq_set *sysctl_get_wqset(int idx)
3316{
3317 struct g_wqset *gwqs;
3318
3319 if (!g_wqset_num)
3320 queue_init(&g_wqset_list);
3321
3322 /* don't bother with locks: this is test-only code! */
3323 qe_foreach_element(gwqs, &g_wqset_list, link) {
3324 if ((int)(wqset_id(gwqs->wqset) & 0xffffffff) == idx)
3325 return gwqs->wqset;
3326 }
3327
3328 /* allocate a new one */
3329 ++g_wqset_num;
3330 gwqs = (struct g_wqset *)kalloc(sizeof(*gwqs));
3331 assert(gwqs != NULL);
3332
39037602 3333 gwqs->wqset = waitq_set_alloc(SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST, NULL);
3e170ce0
A
3334 enqueue_tail(&g_wqset_list, &gwqs->link);
3335 printf("[WQ]: created new waitq set 0x%llx\n", wqset_id(gwqs->wqset));
3336
3337 return gwqs->wqset;
3338}
3339
3340#define MAX_GLOBAL_TEST_QUEUES 64
3341static int g_wq_init = 0;
3342static struct waitq g_wq[MAX_GLOBAL_TEST_QUEUES];
3343
3344static inline struct waitq *global_test_waitq(int idx)
3345{
3346 if (idx < 0)
3347 return NULL;
3348
3349 if (!g_wq_init) {
3350 g_wq_init = 1;
3351 for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++)
39037602 3352 waitq_init(&g_wq[i], SYNC_POLICY_FIFO);
3e170ce0
A
3353 }
3354
3355 return &g_wq[idx % MAX_GLOBAL_TEST_QUEUES];
3356}
3357
3358static int sysctl_waitq_wakeup_one SYSCTL_HANDLER_ARGS
3359{
3360#pragma unused(oidp, arg1, arg2)
3361 int error;
3362 int index;
3363 struct waitq *waitq;
3364 kern_return_t kr;
3365 int64_t event64 = 0;
3366
3367 error = SYSCTL_IN(req, &event64, sizeof(event64));
3368 if (error)
3369 return error;
3370
3371 if (!req->newptr)
3372 return SYSCTL_OUT(req, &event64, sizeof(event64));
3373
3374 if (event64 < 0) {
3375 index = (int)((-event64) & 0xffffffff);
3376 waitq = wqset_waitq(sysctl_get_wqset(index));
3377 index = -index;
3378 } else {
3379 index = (int)event64;
3380 waitq = global_test_waitq(index);
3381 }
3382
3383 event64 = 0;
3384
3385 printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
3386 index, event64);
3387 kr = waitq_wakeup64_one(waitq, (event64_t)event64, THREAD_AWAKENED,
3388 WAITQ_ALL_PRIORITIES);
3389 printf("[WQ]: \tkr=%d\n", kr);
3390
3391 return SYSCTL_OUT(req, &kr, sizeof(kr));
3392}
3393SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_one, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3394 0, 0, sysctl_waitq_wakeup_one, "Q", "wakeup one thread waiting on given event");
3395
3396
3397static int sysctl_waitq_wakeup_all SYSCTL_HANDLER_ARGS
3398{
3399#pragma unused(oidp, arg1, arg2)
3400 int error;
3401 int index;
3402 struct waitq *waitq;
3403 kern_return_t kr;
3404 int64_t event64 = 0;
3405
3406 error = SYSCTL_IN(req, &event64, sizeof(event64));
3407 if (error)
3408 return error;
3409
3410 if (!req->newptr)
3411 return SYSCTL_OUT(req, &event64, sizeof(event64));
3412
3413 if (event64 < 0) {
3414 index = (int)((-event64) & 0xffffffff);
3415 waitq = wqset_waitq(sysctl_get_wqset(index));
3416 index = -index;
3417 } else {
3418 index = (int)event64;
3419 waitq = global_test_waitq(index);
3420 }
3421
3422 event64 = 0;
3423
3424 printf("[WQ]: Waking all threads on waitq [%d] event:0x%llx\n",
3425 index, event64);
3426 kr = waitq_wakeup64_all(waitq, (event64_t)event64,
3427 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
3428 printf("[WQ]: \tkr=%d\n", kr);
3429
3430 return SYSCTL_OUT(req, &kr, sizeof(kr));
3431}
3432SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3433 0, 0, sysctl_waitq_wakeup_all, "Q", "wakeup all threads waiting on given event");
3434
3435
3436static int sysctl_waitq_wait SYSCTL_HANDLER_ARGS
3437{
3438#pragma unused(oidp, arg1, arg2)
3439 int error;
3440 int index;
3441 struct waitq *waitq;
3442 kern_return_t kr;
3443 int64_t event64 = 0;
3444
3445 error = SYSCTL_IN(req, &event64, sizeof(event64));
3446 if (error)
3447 return error;
3448
3449 if (!req->newptr)
3450 return SYSCTL_OUT(req, &event64, sizeof(event64));
3451
3452 if (event64 < 0) {
3453 index = (int)((-event64) & 0xffffffff);
3454 waitq = wqset_waitq(sysctl_get_wqset(index));
3455 index = -index;
3456 } else {
3457 index = (int)event64;
3458 waitq = global_test_waitq(index);
3459 }
3460
3461 event64 = 0;
3462
3463 printf("[WQ]: Current thread waiting on waitq [%d] event:0x%llx\n",
3464 index, event64);
3465 kr = waitq_assert_wait64(waitq, (event64_t)event64, THREAD_INTERRUPTIBLE, 0);
3466 if (kr == THREAD_WAITING)
3467 thread_block(THREAD_CONTINUE_NULL);
3468 printf("[WQ]: \tWoke Up: kr=%d\n", kr);
3469
3470 return SYSCTL_OUT(req, &kr, sizeof(kr));
3471}
3472SYSCTL_PROC(_kern, OID_AUTO, waitq_wait, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3473 0, 0, sysctl_waitq_wait, "Q", "start waiting on given event");
3474
3475
3476static int sysctl_wqset_select SYSCTL_HANDLER_ARGS
3477{
3478#pragma unused(oidp, arg1, arg2)
3479 int error;
3480 struct waitq_set *wqset;
3481 uint64_t event64 = 0;
3482
3483 error = SYSCTL_IN(req, &event64, sizeof(event64));
3484 if (error)
3485 return error;
3486
3487 if (!req->newptr)
3488 goto out;
3489
3490 wqset = sysctl_get_wqset((int)(event64 & 0xffffffff));
3491 g_waitq_set = wqset;
3492
3493 event64 = wqset_id(wqset);
3494 printf("[WQ]: selected wqset 0x%llx\n", event64);
3495
3496out:
3497 if (g_waitq_set)
3498 event64 = wqset_id(g_waitq_set);
3499 else
3500 event64 = (uint64_t)(-1);
3501
3502 return SYSCTL_OUT(req, &event64, sizeof(event64));
3503}
3504SYSCTL_PROC(_kern, OID_AUTO, wqset_select, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3505 0, 0, sysctl_wqset_select, "Q", "select/create a global waitq set");
3506
3507
3508static int sysctl_waitq_link SYSCTL_HANDLER_ARGS
3509{
3510#pragma unused(oidp, arg1, arg2)
3511 int error;
3512 int index;
3513 struct waitq *waitq;
3514 struct waitq_set *wqset;
3515 kern_return_t kr;
3516 uint64_t reserved_link = 0;
3517 int64_t event64 = 0;
3518
3519 error = SYSCTL_IN(req, &event64, sizeof(event64));
3520 if (error)
3521 return error;
3522
3523 if (!req->newptr)
3524 return SYSCTL_OUT(req, &event64, sizeof(event64));
3525
3526 if (!g_waitq_set)
3527 g_waitq_set = sysctl_get_wqset(1);
3528 wqset = g_waitq_set;
3529
3530 if (event64 < 0) {
3531 struct waitq_set *tmp;
3532 index = (int)((-event64) & 0xffffffff);
3533 tmp = sysctl_get_wqset(index);
3534 if (tmp == wqset)
3535 goto out;
3536 waitq = wqset_waitq(tmp);
3537 index = -index;
3538 } else {
3539 index = (int)event64;
3540 waitq = global_test_waitq(index);
3541 }
3542
3543 printf("[WQ]: linking waitq [%d] to global wqset (0x%llx)\n",
3544 index, wqset_id(wqset));
3545 reserved_link = waitq_link_reserve(waitq);
3546 kr = waitq_link(waitq, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
3547 waitq_link_release(reserved_link);
3548
3549 printf("[WQ]: \tkr=%d\n", kr);
3550
3551out:
3552 return SYSCTL_OUT(req, &kr, sizeof(kr));
3553}
3554SYSCTL_PROC(_kern, OID_AUTO, waitq_link, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3555 0, 0, sysctl_waitq_link, "Q", "link global waitq to test waitq set");
3556
3557
3558static int sysctl_waitq_unlink SYSCTL_HANDLER_ARGS
3559{
3560#pragma unused(oidp, arg1, arg2)
3561 int error;
3562 int index;
3563 struct waitq *waitq;
3564 struct waitq_set *wqset;
3565 kern_return_t kr;
3566 uint64_t event64 = 0;
3567
3568 error = SYSCTL_IN(req, &event64, sizeof(event64));
3569 if (error)
3570 return error;
3571
3572 if (!req->newptr)
3573 return SYSCTL_OUT(req, &event64, sizeof(event64));
3574
3575 if (!g_waitq_set)
3576 g_waitq_set = sysctl_get_wqset(1);
3577 wqset = g_waitq_set;
3578
3579 index = (int)event64;
3580 waitq = global_test_waitq(index);
3581
3582 printf("[WQ]: unlinking waitq [%d] from global wqset (0x%llx)\n",
3583 index, wqset_id(wqset));
3584
3585 kr = waitq_unlink(waitq, wqset);
3586 printf("[WQ]: \tkr=%d\n", kr);
3587
3588 return SYSCTL_OUT(req, &kr, sizeof(kr));
3589}
3590SYSCTL_PROC(_kern, OID_AUTO, waitq_unlink, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3591 0, 0, sysctl_waitq_unlink, "Q", "unlink global waitq from test waitq set");
3592
3593
3594static int sysctl_waitq_clear_prepost SYSCTL_HANDLER_ARGS
3595{
3596#pragma unused(oidp, arg1, arg2)
3597 struct waitq *waitq;
3598 uint64_t event64 = 0;
3599 int error, index;
3600
3601 error = SYSCTL_IN(req, &event64, sizeof(event64));
3602 if (error)
3603 return error;
3604
3605 if (!req->newptr)
3606 return SYSCTL_OUT(req, &event64, sizeof(event64));
3607
3608 index = (int)event64;
3609 waitq = global_test_waitq(index);
3610
3611 printf("[WQ]: clearing prepost on waitq [%d]\n", index);
3612 waitq_clear_prepost(waitq);
3613
3614 return SYSCTL_OUT(req, &event64, sizeof(event64));
3615}
3616SYSCTL_PROC(_kern, OID_AUTO, waitq_clear_prepost, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3617 0, 0, sysctl_waitq_clear_prepost, "Q", "clear prepost on given waitq");
3618
3619
3620static int sysctl_wqset_unlink_all SYSCTL_HANDLER_ARGS
3621{
3622#pragma unused(oidp, arg1, arg2)
3623 int error;
3624 struct waitq_set *wqset;
3625 kern_return_t kr;
3626 uint64_t event64 = 0;
3627
3628 error = SYSCTL_IN(req, &event64, sizeof(event64));
3629 if (error)
3630 return error;
3631
3632 if (!req->newptr)
3633 return SYSCTL_OUT(req, &event64, sizeof(event64));
3634
3635 if (!g_waitq_set)
3636 g_waitq_set = sysctl_get_wqset(1);
3637 wqset = g_waitq_set;
3638
3639 printf("[WQ]: unlinking all queues from global wqset (0x%llx)\n",
3640 wqset_id(wqset));
3641
3642 kr = waitq_set_unlink_all(wqset);
3643 printf("[WQ]: \tkr=%d\n", kr);
3644
3645 return SYSCTL_OUT(req, &kr, sizeof(kr));
3646}
3647SYSCTL_PROC(_kern, OID_AUTO, wqset_unlink_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3648 0, 0, sysctl_wqset_unlink_all, "Q", "unlink all queues from test waitq set");
3649
3650
3651static int sysctl_wqset_clear_preposts SYSCTL_HANDLER_ARGS
3652{
3653#pragma unused(oidp, arg1, arg2)
3654 struct waitq_set *wqset = NULL;
3655 uint64_t event64 = 0;
3656 int error, index;
3657
3658 error = SYSCTL_IN(req, &event64, sizeof(event64));
3659 if (error)
3660 return error;
3661
3662 if (!req->newptr)
3663 goto out;
3664
3665 index = (int)((event64) & 0xffffffff);
3666 wqset = sysctl_get_wqset(index);
3667 assert(wqset != NULL);
3668
3669 printf("[WQ]: clearing preposts on wqset 0x%llx\n", wqset_id(wqset));
3670 waitq_set_clear_preposts(wqset);
3671
3672out:
3673 if (wqset)
3674 event64 = wqset_id(wqset);
3675 else
3676 event64 = (uint64_t)(-1);
3677
3678 return SYSCTL_OUT(req, &event64, sizeof(event64));
3679}
3680SYSCTL_PROC(_kern, OID_AUTO, wqset_clear_preposts, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3681 0, 0, sysctl_wqset_clear_preposts, "Q", "clear preposts on given waitq set");
3682
3683#endif /* CONFIG_WAITQ_DEBUG */
5ba3f43e
A
3684#endif /* DEVELOPMENT || DEBUG */
3685
3686