]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
2d21ac55
A
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
1c79356b
A
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/filedesc.h>
78#include <sys/ioctl.h>
91447636
A
79#include <sys/file_internal.h>
80#include <sys/proc_internal.h>
1c79356b 81#include <sys/socketvar.h>
91447636 82#include <sys/uio_internal.h>
1c79356b
A
83#include <sys/kernel.h>
84#include <sys/stat.h>
85#include <sys/malloc.h>
91447636 86#include <sys/sysproto.h>
1c79356b 87
91447636 88#include <sys/mount_internal.h>
1c79356b
A
89#include <sys/protosw.h>
90#include <sys/ev.h>
91#include <sys/user.h>
92#include <sys/kdebug.h>
91447636
A
93#include <sys/poll.h>
94#include <sys/event.h>
95#include <sys/eventvar.h>
316670eb 96#include <sys/proc.h>
39236c6e 97#include <sys/kauth.h>
91447636
A
98
99#include <mach/mach_types.h>
100#include <kern/kern_types.h>
1c79356b 101#include <kern/assert.h>
91447636
A
102#include <kern/kalloc.h>
103#include <kern/thread.h>
104#include <kern/clock.h>
316670eb
A
105#include <kern/ledger.h>
106#include <kern/task.h>
39236c6e
A
107#if CONFIG_TELEMETRY
108#include <kern/telemetry.h>
109#endif
1c79356b
A
110
111#include <sys/mbuf.h>
39236c6e 112#include <sys/domain.h>
1c79356b
A
113#include <sys/socket.h>
114#include <sys/socketvar.h>
115#include <sys/errno.h>
55e303ae 116#include <sys/syscall.h>
91447636 117#include <sys/pipe.h>
1c79356b 118
b0d623f7 119#include <security/audit/audit.h>
e5568f75 120
1c79356b
A
121#include <net/if.h>
122#include <net/route.h>
123
124#include <netinet/in.h>
125#include <netinet/in_systm.h>
126#include <netinet/ip.h>
127#include <netinet/in_pcb.h>
128#include <netinet/ip_var.h>
129#include <netinet/ip6.h>
130#include <netinet/tcp.h>
131#include <netinet/tcp_fsm.h>
132#include <netinet/tcp_seq.h>
133#include <netinet/tcp_timer.h>
134#include <netinet/tcp_var.h>
135#include <netinet/tcpip.h>
136#include <netinet/tcp_debug.h>
0b4e3aa0
A
137/* for wait queue based select */
138#include <kern/wait_queue.h>
91447636 139#include <kern/kalloc.h>
91447636
A
140#include <sys/vnode_internal.h>
141
39236c6e
A
142#include <pexpert/pexpert.h>
143
2d21ac55
A
144/* XXX should be in a header file somewhere */
145void evsofree(struct socket *);
146void evpipefree(struct pipe *);
147void postpipeevent(struct pipe *, int);
148void postevent(struct socket *, struct sockbuf *, int);
149extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
150
91447636
A
151int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
152int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
153extern void *get_bsduthreadarg(thread_t);
154extern int *get_bsduthreadrval(thread_t);
155
2d21ac55 156__private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
91447636
A
157 user_addr_t bufp, user_size_t nbyte,
158 off_t offset, int flags, user_ssize_t *retval);
2d21ac55 159__private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
91447636
A
160 user_addr_t bufp, user_size_t nbyte,
161 off_t offset, int flags, user_ssize_t *retval);
162__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
163__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
9bccf70c 164
6d2010ae
A
165
166/* Conflict wait queue for when selects collide (opaque type) */
167struct wait_queue select_conflict_queue;
168
39236c6e
A
169#if 13841988
170int temp_debug_13841988 = 0;
171#endif
172
6d2010ae
A
173/*
174 * Init routine called from bsd_init.c
175 */
176void select_wait_queue_init(void);
177void
178select_wait_queue_init(void)
179{
180 wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO);
39236c6e
A
181#if 13841988
182 if (PE_parse_boot_argn("temp_debug_13841988", &temp_debug_13841988, sizeof(temp_debug_13841988))) {
183 kprintf("Temporary debugging for 13841988 enabled\n");
184 }
185#endif
6d2010ae
A
186}
187
91447636 188#define f_flag f_fglob->fg_flag
39236c6e 189#define f_type f_fglob->fg_ops->fo_type
91447636
A
190#define f_msgcount f_fglob->fg_msgcount
191#define f_cred f_fglob->fg_cred
192#define f_ops f_fglob->fg_ops
193#define f_offset f_fglob->fg_offset
194#define f_data f_fglob->fg_data
2d21ac55 195
1c79356b
A
196/*
197 * Read system call.
2d21ac55
A
198 *
199 * Returns: 0 Success
200 * preparefileread:EBADF
201 * preparefileread:ESPIPE
202 * preparefileread:ENXIO
203 * preparefileread:EBADF
204 * dofileread:???
1c79356b 205 */
9bccf70c 206int
2d21ac55
A
207read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
208{
209 __pthread_testcancel(1);
210 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
211}
212
213int
214read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 215{
91447636 216 struct fileproc *fp;
9bccf70c 217 int error;
91447636 218 int fd = uap->fd;
b0d623f7 219 struct vfs_context context;
91447636
A
220
221 if ( (error = preparefileread(p, &fp, fd, 0)) )
222 return (error);
9bccf70c 223
b0d623f7
A
224 context = *(vfs_context_current());
225 context.vc_ucred = fp->f_fglob->fg_cred;
226
227 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
91447636
A
228 (off_t)-1, 0, retval);
229
230 donefileread(p, fp, fd);
231
232 return (error);
9bccf70c
A
233}
234
235/*
236 * Pread system call
2d21ac55
A
237 *
238 * Returns: 0 Success
239 * preparefileread:EBADF
240 * preparefileread:ESPIPE
241 * preparefileread:ENXIO
242 * preparefileread:EBADF
243 * dofileread:???
9bccf70c 244 */
9bccf70c 245int
2d21ac55 246pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
9bccf70c 247{
2d21ac55
A
248 __pthread_testcancel(1);
249 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
250}
251
252int
253pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
254{
255 struct fileproc *fp = NULL; /* fp set by preparefileread() */
91447636 256 int fd = uap->fd;
9bccf70c 257 int error;
b0d623f7 258 struct vfs_context context;
9bccf70c 259
91447636 260 if ( (error = preparefileread(p, &fp, fd, 1)) )
4a3eedf9 261 goto out;
91447636 262
b0d623f7
A
263 context = *(vfs_context_current());
264 context.vc_ucred = fp->f_fglob->fg_cred;
265
266 error = dofileread(&context, fp, uap->buf, uap->nbyte,
91447636 267 uap->offset, FOF_OFFSET, retval);
55e303ae 268
91447636
A
269 donefileread(p, fp, fd);
270
b7266188 271 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
55e303ae 272 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
4a3eedf9
A
273
274out:
91447636 275 return (error);
9bccf70c
A
276}
277
278/*
279 * Code common for read and pread
280 */
91447636
A
281
282void
283donefileread(struct proc *p, struct fileproc *fp, int fd)
284{
2d21ac55 285 proc_fdlock_spin(p);
91447636
A
286 fp_drop(p, fd, fp, 1);
287 proc_fdunlock(p);
288}
289
2d21ac55
A
290/*
291 * Returns: 0 Success
292 * EBADF
293 * ESPIPE
294 * ENXIO
295 * fp_lookup:EBADF
296 * fo_read:???
297 */
91447636
A
298int
299preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
300{
301 vnode_t vp;
302 int error;
303 struct fileproc *fp;
304
b0d623f7
A
305 AUDIT_ARG(fd, fd);
306
2d21ac55 307 proc_fdlock_spin(p);
91447636
A
308
309 error = fp_lookup(p, fd, &fp, 1);
310
311 if (error) {
312 proc_fdunlock(p);
313 return (error);
314 }
315 if ((fp->f_flag & FREAD) == 0) {
316 error = EBADF;
317 goto out;
318 }
319 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
320 error = ESPIPE;
321 goto out;
322 }
323 if (fp->f_type == DTYPE_VNODE) {
324 vp = (struct vnode *)fp->f_fglob->fg_data;
325
2d21ac55
A
326 if (check_for_pread && (vnode_isfifo(vp))) {
327 error = ESPIPE;
328 goto out;
329 }
330 if (check_for_pread && (vp->v_flag & VISTTY)) {
331 error = ENXIO;
332 goto out;
333 }
91447636
A
334 }
335
336 *fp_ret = fp;
337
338 proc_fdunlock(p);
339 return (0);
340
341out:
342 fp_drop(p, fd, fp, 1);
343 proc_fdunlock(p);
344 return (error);
345}
346
347
2d21ac55
A
348/*
349 * Returns: 0 Success
350 * EINVAL
351 * fo_read:???
352 */
55e303ae 353__private_extern__ int
2d21ac55
A
354dofileread(vfs_context_t ctx, struct fileproc *fp,
355 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
356 user_ssize_t *retval)
1c79356b 357{
91447636
A
358 uio_t auio;
359 user_ssize_t bytecnt;
360 long error = 0;
361 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 362
9bccf70c
A
363 if (nbyte > INT_MAX)
364 return (EINVAL);
91447636 365
2d21ac55 366 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
367 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
368 &uio_buf[0], sizeof(uio_buf));
369 } else {
370 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
371 &uio_buf[0], sizeof(uio_buf));
372 }
373 uio_addiov(auio, bufp, nbyte);
374
91447636 375 bytecnt = nbyte;
9bccf70c 376
2d21ac55 377 if ((error = fo_read(fp, auio, flags, ctx))) {
91447636 378 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
379 error == EINTR || error == EWOULDBLOCK))
380 error = 0;
381 }
91447636 382 bytecnt -= uio_resid(auio);
91447636
A
383
384 *retval = bytecnt;
385
9bccf70c 386 return (error);
1c79356b
A
387}
388
9bccf70c
A
389/*
390 * Scatter read system call.
2d21ac55
A
391 *
392 * Returns: 0 Success
393 * EINVAL
394 * ENOMEM
395 * copyin:EFAULT
396 * rd_uio:???
9bccf70c 397 */
9bccf70c 398int
2d21ac55
A
399readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
400{
401 __pthread_testcancel(1);
402 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
403}
404
405int
406readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
1c79356b 407{
91447636 408 uio_t auio = NULL;
1c79356b 409 int error;
91447636
A
410 struct user_iovec *iovp;
411
412 /* Verify range bedfore calling uio_create() */
413 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
414 return (EINVAL);
415
416 /* allocate a uio large enough to hold the number of iovecs passed */
417 auio = uio_create(uap->iovcnt, 0,
418 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
419 UIO_READ);
420
421 /* get location of iovecs within the uio. then copyin the iovecs from
422 * user space.
423 */
424 iovp = uio_iovsaddr(auio);
425 if (iovp == NULL) {
426 error = ENOMEM;
427 goto ExitThisRoutine;
428 }
b0d623f7
A
429 error = copyin_user_iovec_array(uap->iovp,
430 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
431 uap->iovcnt, iovp);
91447636
A
432 if (error) {
433 goto ExitThisRoutine;
434 }
435
436 /* finalize uio_t for use and do the IO
437 */
39236c6e
A
438 error = uio_calculateresid(auio);
439 if (error) {
440 goto ExitThisRoutine;
441 }
91447636
A
442 error = rd_uio(p, uap->fd, auio, retval);
443
444ExitThisRoutine:
445 if (auio != NULL) {
446 uio_free(auio);
447 }
1c79356b
A
448 return (error);
449}
450
451/*
452 * Write system call
2d21ac55
A
453 *
454 * Returns: 0 Success
455 * EBADF
456 * fp_lookup:EBADF
457 * dofilewrite:???
1c79356b 458 */
9bccf70c 459int
2d21ac55
A
460write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
461{
462 __pthread_testcancel(1);
463 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
464
465}
466
467int
468write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
1c79356b 469{
91447636 470 struct fileproc *fp;
9bccf70c 471 int error;
91447636 472 int fd = uap->fd;
9bccf70c 473
b0d623f7
A
474 AUDIT_ARG(fd, fd);
475
91447636
A
476 error = fp_lookup(p,fd,&fp,0);
477 if (error)
478 return(error);
479 if ((fp->f_flag & FWRITE) == 0) {
480 error = EBADF;
481 } else {
2d21ac55
A
482 struct vfs_context context = *(vfs_context_current());
483 context.vc_ucred = fp->f_fglob->fg_cred;
484
485 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
9bccf70c 486 (off_t)-1, 0, retval);
91447636
A
487 }
488 if (error == 0)
489 fp_drop_written(p, fd, fp);
490 else
491 fp_drop(p, fd, fp, 0);
9bccf70c
A
492 return(error);
493}
494
495/*
91447636 496 * pwrite system call
2d21ac55
A
497 *
498 * Returns: 0 Success
499 * EBADF
500 * ESPIPE
501 * ENXIO
502 * EINVAL
503 * fp_lookup:EBADF
504 * dofilewrite:???
9bccf70c 505 */
9bccf70c 506int
2d21ac55
A
507pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
508{
509 __pthread_testcancel(1);
510 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
511}
512
513int
514pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 515{
91447636 516 struct fileproc *fp;
9bccf70c 517 int error;
91447636 518 int fd = uap->fd;
2d21ac55 519 vnode_t vp = (vnode_t)0;
91447636 520
b0d623f7
A
521 AUDIT_ARG(fd, fd);
522
91447636
A
523 error = fp_lookup(p,fd,&fp,0);
524 if (error)
525 return(error);
9bccf70c 526
91447636
A
527 if ((fp->f_flag & FWRITE) == 0) {
528 error = EBADF;
529 } else {
2d21ac55
A
530 struct vfs_context context = *vfs_context_current();
531 context.vc_ucred = fp->f_fglob->fg_cred;
532
91447636
A
533 if (fp->f_type != DTYPE_VNODE) {
534 error = ESPIPE;
2d21ac55
A
535 goto errout;
536 }
537 vp = (vnode_t)fp->f_fglob->fg_data;
538 if (vnode_isfifo(vp)) {
539 error = ESPIPE;
540 goto errout;
541 }
542 if ((vp->v_flag & VISTTY)) {
543 error = ENXIO;
544 goto errout;
91447636 545 }
2d21ac55
A
546 if (uap->offset == (off_t)-1) {
547 error = EINVAL;
548 goto errout;
549 }
550
551 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
552 uap->offset, FOF_OFFSET, retval);
9bccf70c 553 }
2d21ac55 554errout:
91447636
A
555 if (error == 0)
556 fp_drop_written(p, fd, fp);
557 else
558 fp_drop(p, fd, fp, 0);
55e303ae 559
b7266188 560 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
55e303ae
A
561 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
562
9bccf70c
A
563 return(error);
564}
565
2d21ac55
A
566/*
567 * Returns: 0 Success
568 * EINVAL
569 * <fo_write>:EPIPE
570 * <fo_write>:??? [indirect through struct fileops]
571 */
55e303ae 572__private_extern__ int
2d21ac55
A
573dofilewrite(vfs_context_t ctx, struct fileproc *fp,
574 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
575 user_ssize_t *retval)
9bccf70c 576{
91447636
A
577 uio_t auio;
578 long error = 0;
579 user_ssize_t bytecnt;
580 char uio_buf[ UIO_SIZEOF(1) ];
91447636 581
9bccf70c
A
582 if (nbyte > INT_MAX)
583 return (EINVAL);
91447636 584
2d21ac55 585 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
586 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
587 &uio_buf[0], sizeof(uio_buf));
588 } else {
589 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
590 &uio_buf[0], sizeof(uio_buf));
591 }
592 uio_addiov(auio, bufp, nbyte);
593
91447636 594 bytecnt = nbyte;
2d21ac55 595 if ((error = fo_write(fp, auio, flags, ctx))) {
91447636 596 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
597 error == EINTR || error == EWOULDBLOCK))
598 error = 0;
55e303ae 599 /* The socket layer handles SIGPIPE */
6d2010ae
A
600 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
601 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) {
2d21ac55
A
602 /* XXX Raise the signal on the thread? */
603 psignal(vfs_context_proc(ctx), SIGPIPE);
604 }
9bccf70c 605 }
91447636 606 bytecnt -= uio_resid(auio);
91447636
A
607 *retval = bytecnt;
608
9bccf70c 609 return (error);
1c79356b 610}
9bccf70c
A
611
612/*
613 * Gather write system call
614 */
9bccf70c 615int
2d21ac55
A
616writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
617{
618 __pthread_testcancel(1);
619 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
620}
621
622int
623writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
1c79356b 624{
91447636 625 uio_t auio = NULL;
1c79356b 626 int error;
91447636
A
627 struct user_iovec *iovp;
628
b0d623f7
A
629 AUDIT_ARG(fd, uap->fd);
630
91447636
A
631 /* Verify range bedfore calling uio_create() */
632 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
633 return (EINVAL);
634
635 /* allocate a uio large enough to hold the number of iovecs passed */
636 auio = uio_create(uap->iovcnt, 0,
637 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
638 UIO_WRITE);
639
640 /* get location of iovecs within the uio. then copyin the iovecs from
641 * user space.
642 */
643 iovp = uio_iovsaddr(auio);
644 if (iovp == NULL) {
645 error = ENOMEM;
646 goto ExitThisRoutine;
647 }
b0d623f7
A
648 error = copyin_user_iovec_array(uap->iovp,
649 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
650 uap->iovcnt, iovp);
91447636
A
651 if (error) {
652 goto ExitThisRoutine;
653 }
654
655 /* finalize uio_t for use and do the IO
656 */
39236c6e
A
657 error = uio_calculateresid(auio);
658 if (error) {
659 goto ExitThisRoutine;
660 }
91447636
A
661 error = wr_uio(p, uap->fd, auio, retval);
662
663ExitThisRoutine:
664 if (auio != NULL) {
665 uio_free(auio);
666 }
1c79356b
A
667 return (error);
668}
669
91447636 670
9bccf70c 671int
2d21ac55 672wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
1c79356b 673{
91447636
A
674 struct fileproc *fp;
675 int error;
676 user_ssize_t count;
2d21ac55 677 struct vfs_context context = *vfs_context_current();
1c79356b 678
91447636
A
679 error = fp_lookup(p,fdes,&fp,0);
680 if (error)
681 return(error);
1c79356b 682
91447636
A
683 if ((fp->f_flag & FWRITE) == 0) {
684 error = EBADF;
685 goto out;
1c79356b 686 }
91447636 687 count = uio_resid(uio);
2d21ac55
A
688
689 context.vc_ucred = fp->f_cred;
690 error = fo_write(fp, uio, 0, &context);
91447636
A
691 if (error) {
692 if (uio_resid(uio) != count && (error == ERESTART ||
693 error == EINTR || error == EWOULDBLOCK))
694 error = 0;
695 /* The socket layer handles SIGPIPE */
6d2010ae
A
696 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
697 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0)
91447636
A
698 psignal(p, SIGPIPE);
699 }
700 *retval = count - uio_resid(uio);
701
91447636 702out:
6d2010ae 703 if (error == 0)
91447636
A
704 fp_drop_written(p, fdes, fp);
705 else
706 fp_drop(p, fdes, fp, 0);
707 return(error);
708}
709
710
711int
2d21ac55 712rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
91447636
A
713{
714 struct fileproc *fp;
715 int error;
716 user_ssize_t count;
2d21ac55 717 struct vfs_context context = *vfs_context_current();
91447636
A
718
719 if ( (error = preparefileread(p, &fp, fdes, 0)) )
720 return (error);
721
722 count = uio_resid(uio);
2d21ac55
A
723
724 context.vc_ucred = fp->f_cred;
725
726 error = fo_read(fp, uio, 0, &context);
9bccf70c 727
91447636
A
728 if (error) {
729 if (uio_resid(uio) != count && (error == ERESTART ||
730 error == EINTR || error == EWOULDBLOCK))
731 error = 0;
1c79356b 732 }
91447636 733 *retval = count - uio_resid(uio);
9bccf70c 734
91447636 735 donefileread(p, fp, fdes);
9bccf70c 736
91447636 737 return (error);
1c79356b
A
738}
739
740/*
741 * Ioctl system call
91447636 742 *
2d21ac55
A
743 * Returns: 0 Success
744 * EBADF
745 * ENOTTY
746 * ENOMEM
747 * ESRCH
748 * copyin:EFAULT
749 * copyoutEFAULT
750 * fp_lookup:EBADF Bad file descriptor
751 * fo_ioctl:???
1c79356b 752 */
9bccf70c 753int
b0d623f7 754ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
1c79356b 755{
39236c6e 756 struct fileproc *fp = NULL;
91447636 757 int error = 0;
39236c6e
A
758 u_int size = 0;
759 caddr_t datap = NULL, memp = NULL;
760 boolean_t is64bit = FALSE;
761 int tmp = 0;
1c79356b
A
762#define STK_PARAMS 128
763 char stkbuf[STK_PARAMS];
91447636 764 int fd = uap->fd;
39236c6e 765 u_long com = uap->com;
2d21ac55 766 struct vfs_context context = *vfs_context_current();
1c79356b 767
e5568f75 768 AUDIT_ARG(fd, uap->fd);
e5568f75 769 AUDIT_ARG(addr, uap->data);
91447636
A
770
771 is64bit = proc_is64bit(p);
b0d623f7
A
772#if CONFIG_AUDIT
773 if (is64bit)
39236c6e 774 AUDIT_ARG(value64, com);
b0d623f7 775 else
39236c6e 776 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com));
b0d623f7 777#endif /* CONFIG_AUDIT */
91447636 778
1c79356b
A
779 /*
780 * Interpret high order word to find amount of data to be
781 * copied to/from the user's address space.
782 */
783 size = IOCPARM_LEN(com);
39236c6e
A
784 if (size > IOCPARM_MAX)
785 return ENOTTY;
1c79356b 786 if (size > sizeof (stkbuf)) {
39236c6e
A
787 if ((memp = (caddr_t)kalloc(size)) == 0)
788 return ENOMEM;
91447636 789 datap = memp;
1c79356b 790 } else
91447636 791 datap = &stkbuf[0];
39236c6e 792 if (com & IOC_IN) {
1c79356b 793 if (size) {
91447636 794 error = copyin(uap->data, datap, size);
39236c6e
A
795 if (error)
796 goto out_nofp;
91447636
A
797 } else {
798 /* XXX - IOC_IN and no size? we should proably return an error here!! */
799 if (is64bit) {
800 *(user_addr_t *)datap = uap->data;
801 }
802 else {
803 *(uint32_t *)datap = (uint32_t)uap->data;
804 }
805 }
39236c6e 806 } else if ((com & IOC_OUT) && size)
1c79356b
A
807 /*
808 * Zero the buffer so the user always
809 * gets back something deterministic.
810 */
91447636 811 bzero(datap, size);
39236c6e 812 else if (com & IOC_VOID) {
91447636
A
813 /* XXX - this is odd since IOC_VOID means no parameters */
814 if (is64bit) {
815 *(user_addr_t *)datap = uap->data;
816 }
817 else {
818 *(uint32_t *)datap = (uint32_t)uap->data;
819 }
820 }
1c79356b 821
39236c6e
A
822 proc_fdlock(p);
823 error = fp_lookup(p,fd,&fp,1);
824 if (error) {
825 proc_fdunlock(p);
826 goto out_nofp;
827 }
828
829 AUDIT_ARG(file, p, fp);
830
831 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
832 error = EBADF;
833 goto out;
834 }
835
836 context.vc_ucred = fp->f_fglob->fg_cred;
837
838#if CONFIG_MACF
839 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, com);
840 if (error)
841 goto out;
842#endif
843
1c79356b 844 switch (com) {
39236c6e
A
845 case FIONCLEX:
846 *fdflags(p, fd) &= ~UF_EXCLOSE;
847 break;
848
849 case FIOCLEX:
850 *fdflags(p, fd) |= UF_EXCLOSE;
851 break;
1c79356b
A
852
853 case FIONBIO:
91447636 854 if ( (tmp = *(int *)datap) )
1c79356b
A
855 fp->f_flag |= FNONBLOCK;
856 else
857 fp->f_flag &= ~FNONBLOCK;
2d21ac55 858 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1c79356b
A
859 break;
860
861 case FIOASYNC:
91447636 862 if ( (tmp = *(int *)datap) )
1c79356b
A
863 fp->f_flag |= FASYNC;
864 else
865 fp->f_flag &= ~FASYNC;
2d21ac55 866 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1c79356b
A
867 break;
868
869 case FIOSETOWN:
91447636 870 tmp = *(int *)datap;
1c79356b
A
871 if (fp->f_type == DTYPE_SOCKET) {
872 ((struct socket *)fp->f_data)->so_pgid = tmp;
1c79356b
A
873 break;
874 }
91447636 875 if (fp->f_type == DTYPE_PIPE) {
2d21ac55 876 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
91447636
A
877 break;
878 }
1c79356b
A
879 if (tmp <= 0) {
880 tmp = -tmp;
881 } else {
2d21ac55 882 struct proc *p1 = proc_find(tmp);
1c79356b
A
883 if (p1 == 0) {
884 error = ESRCH;
885 break;
886 }
2d21ac55
A
887 tmp = p1->p_pgrpid;
888 proc_rele(p1);
1c79356b 889 }
2d21ac55 890 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
1c79356b
A
891 break;
892
893 case FIOGETOWN:
894 if (fp->f_type == DTYPE_SOCKET) {
91447636 895 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
1c79356b
A
896 break;
897 }
2d21ac55 898 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
91447636 899 *(int *)datap = -*(int *)datap;
1c79356b
A
900 break;
901
902 default:
2d21ac55 903 error = fo_ioctl(fp, com, datap, &context);
1c79356b
A
904 /*
905 * Copy any data to user, size was
906 * already set and checked above.
907 */
39236c6e 908 if (error == 0 && (com & IOC_OUT) && size)
91447636 909 error = copyout(datap, uap->data, (u_int)size);
1c79356b
A
910 break;
911 }
91447636
A
912out:
913 fp_drop(p, fd, fp, 1);
914 proc_fdunlock(p);
39236c6e
A
915
916out_nofp:
917 if (memp)
918 kfree(memp, size);
91447636 919 return(error);
1c79356b
A
920}
921
1c79356b 922int selwait, nselcoll;
0b4e3aa0
A
923#define SEL_FIRSTPASS 1
924#define SEL_SECONDPASS 2
9bccf70c
A
925extern int selcontinue(int error);
926extern int selprocess(int error, int sel_pass);
927static int selscan(struct proc *p, struct _select * sel,
b0d623f7 928 int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
6d2010ae
A
929static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
930static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
91447636 931static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
1c79356b
A
932
933/*
934 * Select system call.
2d21ac55
A
935 *
936 * Returns: 0 Success
937 * EINVAL Invalid argument
938 * EAGAIN Nonconformant error if allocation fails
939 * selprocess:???
1c79356b 940 */
9bccf70c 941int
b0d623f7 942select(struct proc *p, struct select_args *uap, int32_t *retval)
2d21ac55
A
943{
944 __pthread_testcancel(1);
945 return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
946}
947
948int
b0d623f7 949select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
1c79356b 950{
9bccf70c 951 int error = 0;
0b4e3aa0 952 u_int ni, nw, size;
91447636 953 thread_t th_act;
1c79356b
A
954 struct uthread *uth;
955 struct _select *sel;
956 int needzerofill = 1;
0b4e3aa0 957 int count = 0;
1c79356b 958
91447636 959 th_act = current_thread();
1c79356b 960 uth = get_bsdthread_info(th_act);
91447636 961 sel = &uth->uu_select;
39236c6e 962 sel->data = &uth->uu_kevent.ss_select_data;
1c79356b
A
963 retval = (int *)get_bsduthreadrval(th_act);
964 *retval = 0;
965
0b4e3aa0 966 if (uap->nd < 0) {
1c79356b 967 return (EINVAL);
0b4e3aa0 968 }
1c79356b 969
2d21ac55
A
970 /* select on thread of process that already called proc_exit() */
971 if (p->p_fd == NULL) {
972 return (EBADF);
973 }
974
1c79356b
A
975 if (uap->nd > p->p_fd->fd_nfiles)
976 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
977
978 nw = howmany(uap->nd, NFDBITS);
979 ni = nw * sizeof(fd_mask);
980
981 /*
2d21ac55
A
982 * if the previously allocated space for the bits is smaller than
983 * what is requested or no space has yet been allocated for this
984 * thread, allocate enough space now.
985 *
986 * Note: If this process fails, select() will return EAGAIN; this
987 * is the same thing pool() returns in a no-memory situation, but
988 * it is not a POSIX compliant error code for select().
1c79356b
A
989 */
990 if (sel->nbytes < (3 * ni)) {
2d21ac55
A
991 int nbytes = 3 * ni;
992
993 /* Free previous allocation, if any */
994 if (sel->ibits != NULL)
995 FREE(sel->ibits, M_TEMP);
996 if (sel->obits != NULL) {
997 FREE(sel->obits, M_TEMP);
998 /* NULL out; subsequent ibits allocation may fail */
999 sel->obits = NULL;
1000 }
1001
1002 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1003 if (sel->ibits == NULL)
1004 return (EAGAIN);
1005 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1006 if (sel->obits == NULL) {
1007 FREE(sel->ibits, M_TEMP);
1008 sel->ibits = NULL;
1009 return (EAGAIN);
1010 }
1011 sel->nbytes = nbytes;
1c79356b 1012 needzerofill = 0;
2d21ac55 1013 }
1c79356b
A
1014
1015 if (needzerofill) {
1016 bzero((caddr_t)sel->ibits, sel->nbytes);
1017 bzero((caddr_t)sel->obits, sel->nbytes);
1018 }
1019
1020 /*
1021 * get the bits from the user address space
1022 */
1023#define getbits(name, x) \
1024 do { \
91447636 1025 if (uap->name && (error = copyin(uap->name, \
1c79356b
A
1026 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1027 goto continuation; \
1028 } while (0)
1029
1030 getbits(in, 0);
1031 getbits(ou, 1);
1032 getbits(ex, 2);
1033#undef getbits
1034
1035 if (uap->tv) {
9bccf70c 1036 struct timeval atv;
91447636 1037 if (IS_64BIT_PROCESS(p)) {
b0d623f7 1038 struct user64_timeval atv64;
91447636
A
1039 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1040 /* Loses resolution - assume timeout < 68 years */
1041 atv.tv_sec = atv64.tv_sec;
1042 atv.tv_usec = atv64.tv_usec;
1043 } else {
b0d623f7
A
1044 struct user32_timeval atv32;
1045 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
1046 atv.tv_sec = atv32.tv_sec;
1047 atv.tv_usec = atv32.tv_usec;
91447636 1048 }
1c79356b
A
1049 if (error)
1050 goto continuation;
9bccf70c 1051 if (itimerfix(&atv)) {
1c79356b
A
1052 error = EINVAL;
1053 goto continuation;
1054 }
0b4e3aa0 1055
9bccf70c 1056 clock_absolutetime_interval_to_deadline(
39236c6e 1057 tvtoabstime(&atv), &sel->data->abstime);
9bccf70c
A
1058 }
1059 else
39236c6e 1060 sel->data->abstime = 0;
9bccf70c 1061
6d2010ae 1062 if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
0b4e3aa0
A
1063 goto continuation;
1064 }
b0d623f7 1065
39236c6e 1066 sel->data->count = count;
91447636 1067 size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
2d21ac55
A
1068 if (uth->uu_allocsize) {
1069 if (uth->uu_wqset == 0)
0b4e3aa0
A
1070 panic("select: wql memory smashed");
1071 /* needed for the select now */
2d21ac55
A
1072 if (size > uth->uu_allocsize) {
1073 kfree(uth->uu_wqset, uth->uu_allocsize);
1074 uth->uu_allocsize = size;
1075 uth->uu_wqset = (wait_queue_set_t)kalloc(size);
1076 if (uth->uu_wqset == (wait_queue_set_t)NULL)
0b4e3aa0 1077 panic("failed to allocate memory for waitqueue\n");
0b4e3aa0
A
1078 }
1079 } else {
2d21ac55
A
1080 uth->uu_allocsize = size;
1081 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
1082 if (uth->uu_wqset == (wait_queue_set_t)NULL)
0b4e3aa0 1083 panic("failed to allocate memory for waitqueue\n");
0b4e3aa0 1084 }
2d21ac55 1085 bzero(uth->uu_wqset, size);
39236c6e 1086 sel->data->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
2d21ac55 1087 wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
0b4e3aa0 1088
1c79356b 1089continuation:
6d2010ae
A
1090
1091 if (error) {
1092 /*
1093 * We have already cleaned up any state we established,
1094 * either locally or as a result of selcount(). We don't
1095 * need to wait_subqueue_unlink_all(), since we haven't set
1096 * anything at this point.
1097 */
1098 return (error);
1099 }
1100
1101 return selprocess(0, SEL_FIRSTPASS);
0b4e3aa0
A
1102}
1103
1104int
1105selcontinue(int error)
1106{
9bccf70c 1107 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
1108}
1109
6d2010ae
A
1110
1111/*
1112 * selprocess
1113 *
1114 * Parameters: error The error code from our caller
1115 * sel_pass The pass we are on
1116 */
1c79356b 1117int
91447636 1118selprocess(int error, int sel_pass)
1c79356b 1119{
9bccf70c 1120 int ncoll;
1c79356b 1121 u_int ni, nw;
91447636 1122 thread_t th_act;
1c79356b
A
1123 struct uthread *uth;
1124 struct proc *p;
1125 struct select_args *uap;
1126 int *retval;
1127 struct _select *sel;
0b4e3aa0 1128 int unwind = 1;
9bccf70c 1129 int prepost = 0;
0b4e3aa0
A
1130 int somewakeup = 0;
1131 int doretry = 0;
9bccf70c 1132 wait_result_t wait_result;
1c79356b
A
1133
1134 p = current_proc();
91447636 1135 th_act = current_thread();
1c79356b
A
1136 uap = (struct select_args *)get_bsduthreadarg(th_act);
1137 retval = (int *)get_bsduthreadrval(th_act);
1138 uth = get_bsdthread_info(th_act);
91447636 1139 sel = &uth->uu_select;
1c79356b 1140
0b4e3aa0
A
1141 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1142 unwind = 0;
39236c6e 1143 if (sel->data->count == 0)
0b4e3aa0 1144 unwind = 0;
1c79356b 1145retry:
0b4e3aa0 1146 if (error != 0) {
6d2010ae
A
1147 sel_pass = SEL_FIRSTPASS; /* Reset for seldrop */
1148 goto done;
0b4e3aa0
A
1149 }
1150
1c79356b 1151 ncoll = nselcoll;
b0d623f7 1152 OSBitOrAtomic(P_SELECT, &p->p_flag);
0b4e3aa0 1153 /* skip scans if the select is just for timeouts */
39236c6e 1154 if (sel->data->count) {
6d2010ae
A
1155 /*
1156 * Clear out any dangling refs from prior calls; technically
1157 * there should not be any.
1158 */
0b4e3aa0 1159 if (sel_pass == SEL_FIRSTPASS)
2d21ac55 1160 wait_queue_sub_clearrefs(uth->uu_wqset);
0b4e3aa0 1161
2d21ac55 1162 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
0b4e3aa0
A
1163 if (error || *retval) {
1164 goto done;
1165 }
1166 if (prepost) {
1167 /* if the select of log, then we canwakeup and discover some one
1168 * else already read the data; go toselct again if time permits
1169 */
1170 prepost = 0;
1171 doretry = 1;
1172 }
1173 if (somewakeup) {
1174 somewakeup = 0;
1175 doretry = 1;
1176 }
1177 }
1178
9bccf70c
A
1179 if (uap->tv) {
1180 uint64_t now;
1181
1182 clock_get_uptime(&now);
39236c6e 1183 if (now >= sel->data->abstime)
9bccf70c 1184 goto done;
1c79356b 1185 }
0b4e3aa0
A
1186
1187 if (doretry) {
1188 /* cleanup obits and try again */
1189 doretry = 0;
1190 sel_pass = SEL_FIRSTPASS;
1191 goto retry;
1192 }
1193
1c79356b
A
1194 /*
1195 * To effect a poll, the timeout argument should be
1196 * non-nil, pointing to a zero-valued timeval structure.
1197 */
39236c6e 1198 if (uap->tv && sel->data->abstime == 0) {
1c79356b
A
1199 goto done;
1200 }
0b4e3aa0
A
1201
1202 /* No spurious wakeups due to colls,no need to check for them */
1203 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1204 sel_pass = SEL_FIRSTPASS;
1c79356b
A
1205 goto retry;
1206 }
0b4e3aa0 1207
b0d623f7 1208 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b 1209
0b4e3aa0 1210 /* if the select is just for timeout skip check */
39236c6e 1211 if (sel->data->count &&(sel_pass == SEL_SECONDPASS))
0b4e3aa0
A
1212 panic("selprocess: 2nd pass assertwaiting");
1213
1214 /* Wait Queue Subordinate has waitqueue as first element */
39236c6e
A
1215 wait_result = wait_queue_assert_wait_with_leeway((wait_queue_t)uth->uu_wqset,
1216 NULL, THREAD_ABORTSAFE,
1217 TIMEOUT_URGENCY_USER_NORMAL, sel->data->abstime, 0);
9bccf70c
A
1218 if (wait_result != THREAD_AWAKENED) {
1219 /* there are no preposted events */
91447636
A
1220 error = tsleep1(NULL, PSOCK | PCATCH,
1221 "select", 0, selcontinue);
0b4e3aa0
A
1222 } else {
1223 prepost = 1;
1224 error = 0;
1225 }
1226
0b4e3aa0 1227 if (error == 0) {
6d2010ae 1228 sel_pass = SEL_SECONDPASS;
0b4e3aa0 1229 if (!prepost)
6d2010ae 1230 somewakeup = 1;
1c79356b 1231 goto retry;
0b4e3aa0 1232 }
1c79356b 1233done:
91447636 1234 if (unwind) {
2d21ac55 1235 wait_subqueue_unlink_all(uth->uu_wqset);
91447636
A
1236 seldrop(p, sel->ibits, uap->nd);
1237 }
b0d623f7 1238 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b
A
1239 /* select is not restarted after signals... */
1240 if (error == ERESTART)
1241 error = EINTR;
1242 if (error == EWOULDBLOCK)
1243 error = 0;
1c79356b
A
1244 nw = howmany(uap->nd, NFDBITS);
1245 ni = nw * sizeof(fd_mask);
1246
1247#define putbits(name, x) \
1248 do { \
91447636
A
1249 if (uap->name && (error2 = \
1250 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1c79356b
A
1251 error = error2; \
1252 } while (0)
1253
1254 if (error == 0) {
1255 int error2;
1256
1257 putbits(in, 0);
1258 putbits(ou, 1);
1259 putbits(ex, 2);
1260#undef putbits
1261 }
1c79356b 1262 return(error);
1c79356b
A
1263}
1264
6d2010ae
A
1265
1266/*
1267 * selscan
1268 *
1269 * Parameters: p Process performing the select
1270 * sel The per-thread select context structure
1271 * nfd The number of file descriptors to scan
1272 * retval The per thread system call return area
1273 * sel_pass Which pass this is; allowed values are
1274 * SEL_FIRSTPASS and SEL_SECONDPASS
1275 * wqsub The per thread wait queue set
1276 *
1277 * Returns: 0 Success
1278 * EIO Invalid p->p_fd field XXX Obsolete?
1279 * EBADF One of the files in the bit vector is
1280 * invalid.
1281 */
1c79356b 1282static int
b0d623f7 1283selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
2d21ac55 1284 int sel_pass, wait_queue_sub_t wqsub)
1c79356b 1285{
2d21ac55
A
1286 struct filedesc *fdp = p->p_fd;
1287 int msk, i, j, fd;
1288 u_int32_t bits;
91447636 1289 struct fileproc *fp;
6d2010ae
A
1290 int n = 0; /* count of bits */
1291 int nc = 0; /* bit vector offset (nc'th bit) */
1c79356b
A
1292 static int flag[3] = { FREAD, FWRITE, 0 };
1293 u_int32_t *iptr, *optr;
1294 u_int nw;
0b4e3aa0
A
1295 u_int32_t *ibits, *obits;
1296 char * wql;
0b4e3aa0 1297 char * wql_ptr;
6d2010ae 1298 int count;
2d21ac55 1299 struct vfs_context context = *vfs_context_current();
1c79356b
A
1300
1301 /*
1302 * Problems when reboot; due to MacOSX signal probs
1303 * in Beaker1C ; verify that the p->p_fd is valid
1304 */
1305 if (fdp == NULL) {
1306 *retval=0;
1307 return(EIO);
1308 }
0b4e3aa0
A
1309 ibits = sel->ibits;
1310 obits = sel->obits;
39236c6e 1311 wql = sel->data->wql;
0b4e3aa0 1312
1c79356b
A
1313 nw = howmany(nfd, NFDBITS);
1314
39236c6e 1315 count = sel->data->count;
2d21ac55
A
1316
1317 nc = 0;
6d2010ae 1318 if (count) {
2d21ac55
A
1319 proc_fdlock(p);
1320 for (msk = 0; msk < 3; msk++) {
1321 iptr = (u_int32_t *)&ibits[msk * nw];
1322 optr = (u_int32_t *)&obits[msk * nw];
1323
1324 for (i = 0; i < nfd; i += NFDBITS) {
1325 bits = iptr[i/NFDBITS];
1326
1327 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1328 bits &= ~(1 << j);
1329 fp = fdp->fd_ofiles[fd];
1330
6d2010ae
A
1331 if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1332 /*
1333 * If we abort because of a bad
1334 * fd, let the caller unwind...
1335 */
2d21ac55
A
1336 proc_fdunlock(p);
1337 return(EBADF);
1338 }
1339 if (sel_pass == SEL_SECONDPASS) {
1340 wql_ptr = (char *)0;
6d2010ae
A
1341 if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) {
1342 fp->f_flags &= ~FP_INSELECT;
1343 fp->f_waddr = (void *)0;
1344 }
2d21ac55
A
1345 } else {
1346 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
6d2010ae
A
1347 if (fp->f_flags & FP_INSELECT) {
1348 /* someone is already in select on this fp */
1349 fp->f_flags |= FP_SELCONFLICT;
1350 wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub);
1351 } else {
1352 fp->f_flags |= FP_INSELECT;
1353 fp->f_waddr = (void *)wqsub;
1354 }
2d21ac55
A
1355 }
1356
1357 context.vc_ucred = fp->f_cred;
1358
6d2010ae 1359 /* The select; set the bit, if true */
39236c6e 1360 if (fp->f_ops && fp->f_type
6d2010ae 1361 && fo_select(fp, flag[msk], wql_ptr, &context)) {
2d21ac55
A
1362 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1363 n++;
1364 }
1365 nc++;
1366 }
1367 }
1368 }
1369 proc_fdunlock(p);
0b4e3aa0 1370 }
1c79356b
A
1371 *retval = n;
1372 return (0);
1373}
1374
b0d623f7 1375int poll_callback(struct kqueue *, struct kevent64_s *, void *);
91447636
A
1376
1377struct poll_continue_args {
1378 user_addr_t pca_fds;
1379 u_int pca_nfds;
1380 u_int pca_rfds;
1381};
1382
9bccf70c 1383int
b0d623f7 1384poll(struct proc *p, struct poll_args *uap, int32_t *retval)
2d21ac55
A
1385{
1386 __pthread_testcancel(1);
1387 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1388}
1389
1390
1391int
b0d623f7 1392poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1c79356b 1393{
91447636
A
1394 struct poll_continue_args *cont;
1395 struct pollfd *fds;
1396 struct kqueue *kq;
1397 struct timeval atv;
1398 int ncoll, error = 0;
1399 u_int nfds = uap->nfds;
1400 u_int rfds = 0;
1401 u_int i;
1402 size_t ni;
1c79356b 1403
91447636
A
1404 /*
1405 * This is kinda bogus. We have fd limits, but that is not
1406 * really related to the size of the pollfd array. Make sure
1407 * we let the process use at least FD_SETSIZE entries and at
1408 * least enough for the current limits. We want to be reasonably
1409 * safe, but not overly restrictive.
1410 */
1411 if (nfds > OPEN_MAX ||
2d21ac55 1412 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
91447636 1413 return (EINVAL);
1c79356b 1414
91447636
A
1415 kq = kqueue_alloc(p);
1416 if (kq == NULL)
1417 return (EAGAIN);
1418
1419 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1420 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1421 if (NULL == cont) {
1422 error = EAGAIN;
1423 goto out;
1424 }
1425
1426 fds = (struct pollfd *)&cont[1];
1427 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1428 if (error)
1429 goto out;
1430
1431 if (uap->timeout != -1) {
1432 struct timeval rtv;
1433
1434 atv.tv_sec = uap->timeout / 1000;
1435 atv.tv_usec = (uap->timeout % 1000) * 1000;
1436 if (itimerfix(&atv)) {
1437 error = EINVAL;
1438 goto out;
1439 }
1440 getmicrouptime(&rtv);
1441 timevaladd(&atv, &rtv);
1442 } else {
1443 atv.tv_sec = 0;
1444 atv.tv_usec = 0;
1445 }
1446
1447 /* JMM - all this P_SELECT stuff is bogus */
1448 ncoll = nselcoll;
b0d623f7 1449 OSBitOrAtomic(P_SELECT, &p->p_flag);
91447636
A
1450 for (i = 0; i < nfds; i++) {
1451 short events = fds[i].events;
b0d623f7 1452 struct kevent64_s kev;
91447636
A
1453 int kerror = 0;
1454
1455 /* per spec, ignore fd values below zero */
1456 if (fds[i].fd < 0) {
1457 fds[i].revents = 0;
1458 continue;
1459 }
1460
1461 /* convert the poll event into a kqueue kevent */
1462 kev.ident = fds[i].fd;
1463 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
91447636 1464 kev.udata = CAST_USER_ADDR_T(&fds[i]);
6d2010ae
A
1465 kev.fflags = 0;
1466 kev.data = 0;
b0d623f7
A
1467 kev.ext[0] = 0;
1468 kev.ext[1] = 0;
91447636
A
1469
1470 /* Handle input events */
2d21ac55 1471 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
91447636
A
1472 kev.filter = EVFILT_READ;
1473 if (!(events & ( POLLIN | POLLRDNORM )))
1474 kev.flags |= EV_OOBAND;
1475 kerror = kevent_register(kq, &kev, p);
1476 }
1477
1478 /* Handle output events */
1479 if (kerror == 0 &&
1480 events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1481 kev.filter = EVFILT_WRITE;
1482 kerror = kevent_register(kq, &kev, p);
1483 }
1484
1485 /* Handle BSD extension vnode events */
1486 if (kerror == 0 &&
1487 events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1488 kev.filter = EVFILT_VNODE;
1489 kev.fflags = 0;
1490 if (events & POLLEXTEND)
1491 kev.fflags |= NOTE_EXTEND;
1492 if (events & POLLATTRIB)
1493 kev.fflags |= NOTE_ATTRIB;
1494 if (events & POLLNLINK)
1495 kev.fflags |= NOTE_LINK;
1496 if (events & POLLWRITE)
1497 kev.fflags |= NOTE_WRITE;
1498 kerror = kevent_register(kq, &kev, p);
1499 }
1500
1501 if (kerror != 0) {
1502 fds[i].revents = POLLNVAL;
1503 rfds++;
1504 } else
1505 fds[i].revents = 0;
1506 }
1507
1508 /* Did we have any trouble registering? */
1509 if (rfds > 0)
1510 goto done;
1511
1512 /* scan for, and possibly wait for, the kevents to trigger */
1513 cont->pca_fds = uap->fds;
1514 cont->pca_nfds = nfds;
1515 cont->pca_rfds = rfds;
b0d623f7 1516 error = kqueue_scan(kq, poll_callback, NULL, cont, &atv, p);
91447636
A
1517 rfds = cont->pca_rfds;
1518
1519 done:
b0d623f7 1520 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
91447636
A
1521 /* poll is not restarted after signals... */
1522 if (error == ERESTART)
1523 error = EINTR;
1524 if (error == EWOULDBLOCK)
1525 error = 0;
1526 if (error == 0) {
1527 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1528 *retval = rfds;
1529 }
1530
1531 out:
1532 if (NULL != cont)
1533 FREE(cont, M_TEMP);
1534
2d21ac55 1535 kqueue_dealloc(kq);
91447636
A
1536 return (error);
1537}
1538
2d21ac55 1539int
b0d623f7 1540poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
91447636
A
1541{
1542 struct poll_continue_args *cont = (struct poll_continue_args *)data;
1543 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
316670eb 1544 short prev_revents = fds->revents;
ff6e181a
A
1545 short mask;
1546
91447636
A
1547 /* convert the results back into revents */
1548 if (kevp->flags & EV_EOF)
1549 fds->revents |= POLLHUP;
1550 if (kevp->flags & EV_ERROR)
1551 fds->revents |= POLLERR;
91447636
A
1552
1553 switch (kevp->filter) {
1554 case EVFILT_READ:
ff6e181a
A
1555 if (fds->revents & POLLHUP)
1556 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1557 else {
1558 mask = 0;
1559 if (kevp->data != 0)
1560 mask |= (POLLIN | POLLRDNORM );
1561 if (kevp->flags & EV_OOBAND)
1562 mask |= ( POLLPRI | POLLRDBAND );
1563 }
1564 fds->revents |= (fds->events & mask);
91447636
A
1565 break;
1566
1567 case EVFILT_WRITE:
1568 if (!(fds->revents & POLLHUP))
1569 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1570 break;
1571
2d21ac55 1572 case EVFILT_VNODE:
91447636
A
1573 if (kevp->fflags & NOTE_EXTEND)
1574 fds->revents |= (fds->events & POLLEXTEND);
1575 if (kevp->fflags & NOTE_ATTRIB)
1576 fds->revents |= (fds->events & POLLATTRIB);
1577 if (kevp->fflags & NOTE_LINK)
1578 fds->revents |= (fds->events & POLLNLINK);
1579 if (kevp->fflags & NOTE_WRITE)
1580 fds->revents |= (fds->events & POLLWRITE);
1581 break;
1582 }
2d21ac55 1583
316670eb 1584 if (fds->revents != 0 && prev_revents == 0)
2d21ac55
A
1585 cont->pca_rfds++;
1586
91447636
A
1587 return 0;
1588}
1589
1590int
1591seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1592{
1593
1594 return (1);
1595}
1596
6d2010ae
A
1597/*
1598 * selcount
1599 *
1600 * Count the number of bits set in the input bit vector, and establish an
1601 * outstanding fp->f_iocount for each of the descriptors which will be in
1602 * use in the select operation.
1603 *
1604 * Parameters: p The process doing the select
1605 * ibits The input bit vector
1606 * nfd The number of fd's in the vector
1607 * countp Pointer to where to store the bit count
1608 *
1609 * Returns: 0 Success
1610 * EIO Bad per process open file table
1611 * EBADF One of the bits in the input bit vector
1612 * references an invalid fd
1613 *
1614 * Implicit: *countp (modified) Count of fd's
1615 *
1616 * Notes: This function is the first pass under the proc_fdlock() that
1617 * permits us to recognize invalid descriptors in the bit vector;
1618 * the may, however, not remain valid through the drop and
1619 * later reacquisition of the proc_fdlock().
1620 */
91447636 1621static int
6d2010ae 1622selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
91447636 1623{
2d21ac55
A
1624 struct filedesc *fdp = p->p_fd;
1625 int msk, i, j, fd;
1626 u_int32_t bits;
91447636 1627 struct fileproc *fp;
0b4e3aa0 1628 int n = 0;
91447636 1629 u_int32_t *iptr;
0b4e3aa0 1630 u_int nw;
91447636
A
1631 int error=0;
1632 int dropcount;
6d2010ae 1633 int need_wakeup = 0;
0b4e3aa0
A
1634
1635 /*
1636 * Problems when reboot; due to MacOSX signal probs
1637 * in Beaker1C ; verify that the p->p_fd is valid
1638 */
1639 if (fdp == NULL) {
2d21ac55 1640 *countp = 0;
0b4e3aa0
A
1641 return(EIO);
1642 }
0b4e3aa0
A
1643 nw = howmany(nfd, NFDBITS);
1644
91447636 1645 proc_fdlock(p);
0b4e3aa0
A
1646 for (msk = 0; msk < 3; msk++) {
1647 iptr = (u_int32_t *)&ibits[msk * nw];
1648 for (i = 0; i < nfd; i += NFDBITS) {
1649 bits = iptr[i/NFDBITS];
1650 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1651 bits &= ~(1 << j);
1652 fp = fdp->fd_ofiles[fd];
1653 if (fp == NULL ||
1654 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2d21ac55 1655 *countp = 0;
91447636
A
1656 error = EBADF;
1657 goto bad;
0b4e3aa0 1658 }
91447636 1659 fp->f_iocount++;
0b4e3aa0
A
1660 n++;
1661 }
1662 }
1663 }
91447636
A
1664 proc_fdunlock(p);
1665
2d21ac55 1666 *countp = n;
91447636 1667 return (0);
6d2010ae 1668
91447636
A
1669bad:
1670 dropcount = 0;
1671
1672 if (n== 0)
1673 goto out;
6d2010ae
A
1674 /* Ignore error return; it's already EBADF */
1675 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
91447636 1676
91447636
A
1677out:
1678 proc_fdunlock(p);
6d2010ae
A
1679 if (need_wakeup) {
1680 wakeup(&p->p_fpdrainwait);
1681 }
91447636
A
1682 return(error);
1683}
1684
6d2010ae
A
1685
1686/*
1687 * seldrop_locked
1688 *
1689 * Drop outstanding wait queue references set up during selscan(); drop the
1690 * outstanding per fileproc f_iocount() picked up during the selcount().
1691 *
1692 * Parameters: p Process performing the select
1693 * ibits Input pit bector of fd's
1694 * nfd Number of fd's
1695 * lim Limit to number of vector entries to
1696 * consider, or -1 for "all"
1697 * inselect True if
1698 * need_wakeup Pointer to flag to set to do a wakeup
1699 * if f_iocont on any descriptor goes to 0
1700 *
1701 * Returns: 0 Success
1702 * EBADF One or more fds in the bit vector
1703 * were invalid, but the rest
1704 * were successfully dropped
1705 *
1706 * Notes: An fd make become bad while the proc_fdlock() is not held,
1707 * if a multithreaded application closes the fd out from under
1708 * the in progress select. In this case, we still have to
1709 * clean up after the set up on the remaining fds.
1710 */
91447636 1711static int
6d2010ae 1712seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
91447636 1713{
2d21ac55
A
1714 struct filedesc *fdp = p->p_fd;
1715 int msk, i, j, fd;
1716 u_int32_t bits;
91447636 1717 struct fileproc *fp;
91447636
A
1718 u_int32_t *iptr;
1719 u_int nw;
6d2010ae
A
1720 int error = 0;
1721 int dropcount = 0;
1722 uthread_t uth = get_bsdthread_info(current_thread());
1723
1724 *need_wakeup = 0;
91447636
A
1725
1726 /*
1727 * Problems when reboot; due to MacOSX signal probs
1728 * in Beaker1C ; verify that the p->p_fd is valid
1729 */
1730 if (fdp == NULL) {
1731 return(EIO);
1732 }
1733
1734 nw = howmany(nfd, NFDBITS);
1735
91447636
A
1736 for (msk = 0; msk < 3; msk++) {
1737 iptr = (u_int32_t *)&ibits[msk * nw];
1738 for (i = 0; i < nfd; i += NFDBITS) {
1739 bits = iptr[i/NFDBITS];
1740 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1741 bits &= ~(1 << j);
1742 fp = fdp->fd_ofiles[fd];
6d2010ae
A
1743 /*
1744 * If we've already dropped as many as were
1745 * counted/scanned, then we are done.
1746 */
1747 if ((fromselcount != 0) && (++dropcount > lim))
1748 goto done;
1749
1750 if (fp == NULL) {
1751 /* skip (now) bad fds */
1752 error = EBADF;
1753 continue;
1754 }
1755 /*
1756 * Only clear the flag if we set it. We'll
1757 * only find that we set it if we had made
1758 * at least one [partial] pass through selscan().
1759 */
1760 if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) {
1761 fp->f_flags &= ~FP_INSELECT;
1762 fp->f_waddr = (void *)0;
91447636 1763 }
91447636 1764
6d2010ae
A
1765 fp->f_iocount--;
1766 if (fp->f_iocount < 0)
1767 panic("f_iocount overdecrement!");
1768
1769 if (fp->f_iocount == 0) {
1770 /*
1771 * The last iocount is responsible for clearing
1772 * selconfict flag - even if we didn't set it -
1773 * and is also responsible for waking up anyone
1774 * waiting on iocounts to drain.
1775 */
1776 if (fp->f_flags & FP_SELCONFLICT)
1777 fp->f_flags &= ~FP_SELCONFLICT;
1778 if (p->p_fpdrainwait) {
1779 p->p_fpdrainwait = 0;
1780 *need_wakeup = 1;
1781 }
91447636
A
1782 }
1783 }
1784 }
1785 }
6d2010ae
A
1786done:
1787 return (error);
1788}
1789
1790
1791static int
1792seldrop(struct proc *p, u_int32_t *ibits, int nfd)
1793{
1794 int error;
1795 int need_wakeup = 0;
1796
1797 proc_fdlock(p);
1798 error = seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0);
91447636 1799 proc_fdunlock(p);
6d2010ae
A
1800 if (need_wakeup) {
1801 wakeup(&p->p_fpdrainwait);
1802 }
1803 return (error);
0b4e3aa0
A
1804}
1805
1c79356b
A
1806/*
1807 * Record a select request.
1808 */
1809void
91447636 1810selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1c79356b 1811{
91447636 1812 thread_t cur_act = current_thread();
0b4e3aa0 1813 struct uthread * ut = get_bsdthread_info(cur_act);
1c79356b 1814
0b4e3aa0
A
1815 /* need to look at collisions */
1816
0b4e3aa0 1817 /*do not record if this is second pass of select */
6d2010ae 1818 if(p_wql == (void *)0) {
0b4e3aa0 1819 return;
1c79356b
A
1820 }
1821
0b4e3aa0 1822 if ((sip->si_flags & SI_INITED) == 0) {
55e303ae 1823 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
0b4e3aa0
A
1824 sip->si_flags |= SI_INITED;
1825 sip->si_flags &= ~SI_CLEAR;
1826 }
1827
1828 if (sip->si_flags & SI_RECORDED) {
1829 sip->si_flags |= SI_COLL;
1830 } else
1831 sip->si_flags &= ~SI_COLL;
1832
1833 sip->si_flags |= SI_RECORDED;
2d21ac55
A
1834 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
1835 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
91447636 1836 (wait_queue_link_t)p_wql);
0b4e3aa0 1837
1c79356b
A
1838 return;
1839}
1840
1841void
2d21ac55 1842selwakeup(struct selinfo *sip)
1c79356b 1843{
1c79356b 1844
0b4e3aa0 1845 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 1846 return;
0b4e3aa0 1847 }
1c79356b
A
1848
1849 if (sip->si_flags & SI_COLL) {
1850 nselcoll++;
1851 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
1852#if 0
1853 /* will not support */
1854 //wakeup((caddr_t)&selwait);
1855#endif
1c79356b 1856 }
1c79356b 1857
0b4e3aa0 1858 if (sip->si_flags & SI_RECORDED) {
b0d623f7 1859 wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED);
0b4e3aa0 1860 sip->si_flags &= ~SI_RECORDED;
1c79356b 1861 }
1c79356b 1862
1c79356b
A
1863}
1864
1865void
2d21ac55 1866selthreadclear(struct selinfo *sip)
1c79356b 1867{
1c79356b 1868
0b4e3aa0
A
1869 if ((sip->si_flags & SI_INITED) == 0) {
1870 return;
1871 }
1872 if (sip->si_flags & SI_RECORDED) {
1873 selwakeup(sip);
1874 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 1875 }
0b4e3aa0 1876 sip->si_flags |= SI_CLEAR;
b0d623f7 1877 wait_queue_unlink_all(&sip->si_wait_queue);
1c79356b
A
1878}
1879
1880
91447636
A
1881
1882
91447636
A
1883#define DBG_POST 0x10
1884#define DBG_WATCH 0x11
1885#define DBG_WAIT 0x12
1886#define DBG_MOD 0x13
1887#define DBG_EWAKEUP 0x14
1888#define DBG_ENQUEUE 0x15
1889#define DBG_DEQUEUE 0x16
1890
1891#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1892#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1893#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1894#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1895#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1896#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1897#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1898
1899
1900#define EVPROCDEQUE(p, evq) do { \
1901 proc_lock(p); \
1902 if (evq->ee_flags & EV_QUEUED) { \
1903 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \
1904 evq->ee_flags &= ~EV_QUEUED; \
1905 } \
1906 proc_unlock(p); \
1907} while (0);
1908
1c79356b
A
1909
1910/*
1911 * called upon socket close. deque and free all events for
91447636 1912 * the socket... socket must be locked by caller.
1c79356b 1913 */
9bccf70c 1914void
1c79356b
A
1915evsofree(struct socket *sp)
1916{
91447636
A
1917 struct eventqelt *evq, *next;
1918 proc_t p;
1919
1920 if (sp == NULL)
1921 return;
1c79356b 1922
91447636
A
1923 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1924 next = evq->ee_slist.tqe_next;
1925 p = evq->ee_proc;
1c79356b 1926
91447636
A
1927 if (evq->ee_flags & EV_QUEUED) {
1928 EVPROCDEQUE(p, evq);
1929 }
1930 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1931 FREE(evq, M_TEMP);
1932 }
1c79356b
A
1933}
1934
1935
91447636
A
1936/*
1937 * called upon pipe close. deque and free all events for
1938 * the pipe... pipe must be locked by caller
1939 */
1940void
1941evpipefree(struct pipe *cpipe)
1942{
1943 struct eventqelt *evq, *next;
1944 proc_t p;
1c79356b 1945
91447636
A
1946 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1947 next = evq->ee_slist.tqe_next;
1948 p = evq->ee_proc;
1c79356b 1949
91447636
A
1950 EVPROCDEQUE(p, evq);
1951
1952 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1953 FREE(evq, M_TEMP);
1954 }
1955}
1c79356b
A
1956
1957
1958/*
91447636
A
1959 * enqueue this event if it's not already queued. wakeup
1960 * the proc if we do queue this event to it...
1961 * entered with proc lock held... we drop it before
1962 * doing the wakeup and return in that state
1c79356b 1963 */
91447636
A
1964static void
1965evprocenque(struct eventqelt *evq)
1c79356b 1966{
91447636
A
1967 proc_t p;
1968
1969 assert(evq);
1970 p = evq->ee_proc;
1971
2d21ac55 1972 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
91447636
A
1973
1974 proc_lock(p);
1975
1976 if (evq->ee_flags & EV_QUEUED) {
1977 proc_unlock(p);
1978
1979 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1980 return;
1981 }
1982 evq->ee_flags |= EV_QUEUED;
1983
1984 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
1985
1986 proc_unlock(p);
1987
1988 wakeup(&p->p_evlist);
1989
1990 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1c79356b
A
1991}
1992
91447636 1993
1c79356b 1994/*
91447636 1995 * pipe lock must be taken by the caller
1c79356b 1996 */
9bccf70c 1997void
91447636 1998postpipeevent(struct pipe *pipep, int event)
1c79356b 1999{
91447636
A
2000 int mask;
2001 struct eventqelt *evq;
2002
2003 if (pipep == NULL)
2004 return;
2005 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
2006
2007 for (evq = pipep->pipe_evlist.tqh_first;
2008 evq != NULL; evq = evq->ee_slist.tqe_next) {
2009
2010 if (evq->ee_eventmask == 0)
2011 continue;
2012 mask = 0;
2013
2014 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
2015
2016 case EV_RWBYTES:
2017 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
2018 mask |= EV_RE;
2019 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
2020 }
2021 if ((evq->ee_eventmask & EV_WR) &&
316670eb 2022 (MAX(pipep->pipe_buffer.size,PIPE_SIZE) - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
91447636
A
2023
2024 if (pipep->pipe_state & PIPE_EOF) {
2025 mask |= EV_WR|EV_RESET;
2026 break;
2027 }
2028 mask |= EV_WR;
316670eb 2029 evq->ee_req.er_wcnt = MAX(pipep->pipe_buffer.size, PIPE_SIZE) - pipep->pipe_buffer.cnt;
91447636
A
2030 }
2031 break;
2032
2033 case EV_WCLOSED:
2034 case EV_RCLOSED:
2035 if ((evq->ee_eventmask & EV_RE)) {
2036 mask |= EV_RE|EV_RCLOSED;
2037 }
2038 if ((evq->ee_eventmask & EV_WR)) {
2039 mask |= EV_WR|EV_WCLOSED;
2040 }
2041 break;
2042
2043 default:
2044 return;
2045 }
2046 if (mask) {
2047 /*
2048 * disarm... postevents are nops until this event is 'read' via
2049 * waitevent and then re-armed via modwatch
2050 */
2051 evq->ee_eventmask = 0;
2052
2053 /*
2054 * since events are disarmed until after the waitevent
2055 * the ee_req.er_xxxx fields can't change once we've
2056 * inserted this event into the proc queue...
2057 * therefore, the waitevent will see a 'consistent'
2058 * snapshot of the event, even though it won't hold
2059 * the pipe lock, and we're updating the event outside
2060 * of the proc lock, which it will hold
2061 */
2062 evq->ee_req.er_eventbits |= mask;
2063
2d21ac55 2064 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
91447636
A
2065
2066 evprocenque(evq);
2067 }
2068 }
2069 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
1c79356b
A
2070}
2071
2d21ac55 2072#if SOCKETS
1c79356b 2073/*
91447636
A
2074 * given either a sockbuf or a socket run down the
2075 * event list and queue ready events found...
2076 * the socket must be locked by the caller
1c79356b 2077 */
91447636
A
2078void
2079postevent(struct socket *sp, struct sockbuf *sb, int event)
1c79356b 2080{
91447636
A
2081 int mask;
2082 struct eventqelt *evq;
2083 struct tcpcb *tp;
2084
2085 if (sb)
2086 sp = sb->sb_so;
2087 if (sp == NULL)
2088 return;
2089
2090 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2091
2092 for (evq = sp->so_evlist.tqh_first;
2093 evq != NULL; evq = evq->ee_slist.tqe_next) {
2094
2095 if (evq->ee_eventmask == 0)
2096 continue;
2097 mask = 0;
2098
2099 /* ready for reading:
2100 - byte cnt >= receive low water mark
2101 - read-half of conn closed
2102 - conn pending for listening sock
2103 - socket error pending
2104
2105 ready for writing
2106 - byte cnt avail >= send low water mark
2107 - write half of conn closed
2108 - socket error pending
2109 - non-blocking conn completed successfully
2110
2111 exception pending
2112 - out of band data
2113 - sock at out of band mark
2114 */
2115
2116 switch (event & EV_DMASK) {
2117
2118 case EV_OOB:
2119 if ((evq->ee_eventmask & EV_EX)) {
2120 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2121 mask |= EV_EX|EV_OOB;
2122 }
2123 break;
2124
2125 case EV_RWBYTES|EV_OOB:
2126 if ((evq->ee_eventmask & EV_EX)) {
2127 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2128 mask |= EV_EX|EV_OOB;
2129 }
2130 /*
2131 * fall into the next case
2132 */
2133 case EV_RWBYTES:
2134 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
39236c6e
A
2135 /* for AFP/OT purposes; may go away in future */
2136 if ((SOCK_DOM(sp) == PF_INET ||
2137 SOCK_DOM(sp) == PF_INET6) &&
2138 SOCK_PROTO(sp) == IPPROTO_TCP &&
2139 (sp->so_error == ECONNREFUSED ||
2140 sp->so_error == ECONNRESET)) {
2141 if (sp->so_pcb == NULL ||
2142 sotoinpcb(sp)->inp_state ==
2143 INPCB_STATE_DEAD ||
2144 (tp = sototcpcb(sp)) == NULL ||
2145 tp->t_state == TCPS_CLOSED) {
2146 mask |= EV_RE|EV_RESET;
2147 break;
91447636
A
2148 }
2149 }
2150 mask |= EV_RE;
2151 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2152
2153 if (sp->so_state & SS_CANTRCVMORE) {
2154 mask |= EV_FIN;
2155 break;
2156 }
2157 }
2158 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
39236c6e
A
2159 /* for AFP/OT purposes; may go away in future */
2160 if ((SOCK_DOM(sp) == PF_INET ||
2161 SOCK_DOM(sp) == PF_INET6) &&
2162 SOCK_PROTO(sp) == IPPROTO_TCP &&
2163 (sp->so_error == ECONNREFUSED ||
2164 sp->so_error == ECONNRESET)) {
2165 if (sp->so_pcb == NULL ||
2166 sotoinpcb(sp)->inp_state ==
2167 INPCB_STATE_DEAD ||
2168 (tp = sototcpcb(sp)) == NULL ||
2169 tp->t_state == TCPS_CLOSED) {
2170 mask |= EV_WR|EV_RESET;
2171 break;
91447636
A
2172 }
2173 }
2174 mask |= EV_WR;
2175 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2176 }
2177 break;
2178
2179 case EV_RCONN:
2180 if ((evq->ee_eventmask & EV_RE)) {
2181 mask |= EV_RE|EV_RCONN;
2182 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
2183 }
2184 break;
2185
2186 case EV_WCONN:
2187 if ((evq->ee_eventmask & EV_WR)) {
2188 mask |= EV_WR|EV_WCONN;
2189 }
2190 break;
2191
2192 case EV_RCLOSED:
2193 if ((evq->ee_eventmask & EV_RE)) {
2194 mask |= EV_RE|EV_RCLOSED;
2195 }
2196 break;
2197
2198 case EV_WCLOSED:
2199 if ((evq->ee_eventmask & EV_WR)) {
2200 mask |= EV_WR|EV_WCLOSED;
2201 }
2202 break;
2203
2204 case EV_FIN:
2205 if (evq->ee_eventmask & EV_RE) {
2206 mask |= EV_RE|EV_FIN;
2207 }
2208 break;
2209
2210 case EV_RESET:
2211 case EV_TIMEOUT:
2212 if (evq->ee_eventmask & EV_RE) {
2213 mask |= EV_RE | event;
2214 }
2215 if (evq->ee_eventmask & EV_WR) {
2216 mask |= EV_WR | event;
2217 }
2218 break;
2219
2220 default:
2221 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2222 return;
2223 } /* switch */
2224
2225 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2226
2227 if (mask) {
2228 /*
2229 * disarm... postevents are nops until this event is 'read' via
2230 * waitevent and then re-armed via modwatch
2231 */
2232 evq->ee_eventmask = 0;
2233
2234 /*
2235 * since events are disarmed until after the waitevent
2236 * the ee_req.er_xxxx fields can't change once we've
2237 * inserted this event into the proc queue...
2238 * since waitevent can't see this event until we
2239 * enqueue it, waitevent will see a 'consistent'
2240 * snapshot of the event, even though it won't hold
2241 * the socket lock, and we're updating the event outside
2242 * of the proc lock, which it will hold
2243 */
2244 evq->ee_req.er_eventbits |= mask;
2245
2246 evprocenque(evq);
2247 }
2248 }
2249 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
1c79356b 2250}
2d21ac55 2251#endif /* SOCKETS */
1c79356b 2252
1c79356b
A
2253
2254/*
2255 * watchevent system call. user passes us an event to watch
2256 * for. we malloc an event object, initialize it, and queue
2257 * it to the open socket. when the event occurs, postevent()
2258 * will enque it back to our proc where we can retrieve it
2259 * via waitevent().
2260 *
2261 * should this prevent duplicate events on same socket?
2d21ac55
A
2262 *
2263 * Returns:
2264 * ENOMEM No memory for operation
2265 * copyin:EFAULT
1c79356b
A
2266 */
2267int
91447636 2268watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
1c79356b 2269{
91447636
A
2270 struct eventqelt *evq = (struct eventqelt *)0;
2271 struct eventqelt *np = NULL;
2d21ac55 2272 struct eventreq64 *erp;
91447636
A
2273 struct fileproc *fp = NULL;
2274 int error;
2275
2276 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2277
2278 // get a qelt and fill with users req
2279 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2280
2281 if (evq == NULL)
2d21ac55 2282 return (ENOMEM);
91447636
A
2283 erp = &evq->ee_req;
2284
2285 // get users request pkt
91447636 2286
2d21ac55
A
2287 if (IS_64BIT_PROCESS(p)) {
2288 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2289 } else {
2290 struct eventreq32 er32;
2291
2292 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2293 if (error == 0) {
2294 /*
2295 * the user only passes in the
2296 * er_type, er_handle and er_data...
2297 * the other fields are initialized
2298 * below, so don't bother to copy
2299 */
2300 erp->er_type = er32.er_type;
2301 erp->er_handle = er32.er_handle;
2302 erp->er_data = (user_addr_t)er32.er_data;
2303 }
2304 }
2305 if (error) {
2306 FREE(evq, M_TEMP);
91447636 2307 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2d21ac55
A
2308
2309 return(error);
91447636 2310 }
2d21ac55 2311 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2312
2313 // validate, freeing qelt if errors
2314 error = 0;
2315 proc_fdlock(p);
2316
2317 if (erp->er_type != EV_FD) {
2318 error = EINVAL;
2319 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2320 error = EBADF;
2d21ac55 2321#if SOCKETS
91447636
A
2322 } else if (fp->f_type == DTYPE_SOCKET) {
2323 socket_lock((struct socket *)fp->f_data, 1);
2324 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2325#endif /* SOCKETS */
91447636
A
2326 } else if (fp->f_type == DTYPE_PIPE) {
2327 PIPE_LOCK((struct pipe *)fp->f_data);
2328 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2329 } else {
2330 fp_drop(p, erp->er_handle, fp, 1);
2331 error = EINVAL;
2332 }
2333 proc_fdunlock(p);
2334
2335 if (error) {
2336 FREE(evq, M_TEMP);
2337
2338 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2339 return(error);
2340 }
2341
2342 /*
2343 * only allow one watch per file per proc
2344 */
2345 for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2346 if (np->ee_proc == p) {
2d21ac55 2347#if SOCKETS
91447636
A
2348 if (fp->f_type == DTYPE_SOCKET)
2349 socket_unlock((struct socket *)fp->f_data, 1);
2350 else
2d21ac55 2351#endif /* SOCKETS */
91447636
A
2352 PIPE_UNLOCK((struct pipe *)fp->f_data);
2353 fp_drop(p, erp->er_handle, fp, 0);
2354 FREE(evq, M_TEMP);
2355
2356 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2357 return(EINVAL);
2358 }
2359 }
2360 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2361 evq->ee_proc = p;
2362 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2363 evq->ee_flags = 0;
2364
2d21ac55 2365#if SOCKETS
91447636
A
2366 if (fp->f_type == DTYPE_SOCKET) {
2367 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2368 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2369
2370 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2371 } else
2372#endif /* SOCKETS */
2373 {
91447636
A
2374 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2375 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2376
2377 PIPE_UNLOCK((struct pipe *)fp->f_data);
2378 }
2379 fp_drop_event(p, erp->er_handle, fp);
2380
2381 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2382 return(0);
1c79356b
A
2383}
2384
91447636 2385
1c79356b
A
2386
2387/*
2388 * waitevent system call.
2389 * grabs the next waiting event for this proc and returns
2390 * it. if no events, user can request to sleep with timeout
2d21ac55
A
2391 * or without or poll mode
2392 * ((tv != NULL && interval == 0) || tv == -1)
1c79356b
A
2393 */
2394int
91447636 2395waitevent(proc_t p, struct waitevent_args *uap, int *retval)
1c79356b 2396{
91447636
A
2397 int error = 0;
2398 struct eventqelt *evq;
2d21ac55 2399 struct eventreq64 *erp;
9bccf70c 2400 uint64_t abstime, interval;
2d21ac55
A
2401 boolean_t fast_poll = FALSE;
2402 union {
2403 struct eventreq64 er64;
2404 struct eventreq32 er32;
2405 } uer;
2406
2407 interval = 0;
1c79356b
A
2408
2409 if (uap->tv) {
9bccf70c 2410 struct timeval atv;
2d21ac55
A
2411 /*
2412 * check for fast poll method
2413 */
2414 if (IS_64BIT_PROCESS(p)) {
2415 if (uap->tv == (user_addr_t)-1)
2416 fast_poll = TRUE;
2417 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2418 fast_poll = TRUE;
2419
2420 if (fast_poll == TRUE) {
2421 if (p->p_evlist.tqh_first == NULL) {
2422 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2423 /*
2424 * poll failed
2425 */
2426 *retval = 1;
2427 return (0);
2428 }
2429 proc_lock(p);
2430 goto retry;
2431 }
b0d623f7
A
2432 if (IS_64BIT_PROCESS(p)) {
2433 struct user64_timeval atv64;
2434 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
2435 /* Loses resolution - assume timeout < 68 years */
2436 atv.tv_sec = atv64.tv_sec;
2437 atv.tv_usec = atv64.tv_usec;
2438 } else {
2439 struct user32_timeval atv32;
2440 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
2441 atv.tv_sec = atv32.tv_sec;
2442 atv.tv_usec = atv32.tv_usec;
2443 }
9bccf70c 2444
1c79356b 2445 if (error)
9bccf70c 2446 return(error);
1c79356b
A
2447 if (itimerfix(&atv)) {
2448 error = EINVAL;
2449 return(error);
2450 }
9bccf70c 2451 interval = tvtoabstime(&atv);
2d21ac55 2452 }
9bccf70c 2453 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1c79356b 2454
91447636 2455 proc_lock(p);
1c79356b 2456retry:
91447636
A
2457 if ((evq = p->p_evlist.tqh_first) != NULL) {
2458 /*
2459 * found one... make a local copy while it's still on the queue
2460 * to prevent it from changing while in the midst of copying
2461 * don't want to hold the proc lock across a copyout because
2462 * it might block on a page fault at the target in user space
2463 */
2d21ac55 2464 erp = &evq->ee_req;
91447636 2465
2d21ac55
A
2466 if (IS_64BIT_PROCESS(p))
2467 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2468 else {
2469 uer.er32.er_type = erp->er_type;
2470 uer.er32.er_handle = erp->er_handle;
2471 uer.er32.er_data = (uint32_t)erp->er_data;
2472 uer.er32.er_ecnt = erp->er_ecnt;
2473 uer.er32.er_rcnt = erp->er_rcnt;
2474 uer.er32.er_wcnt = erp->er_wcnt;
2475 uer.er32.er_eventbits = erp->er_eventbits;
2476 }
91447636
A
2477 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2478
2479 evq->ee_flags &= ~EV_QUEUED;
1c79356b 2480
91447636
A
2481 proc_unlock(p);
2482
2d21ac55
A
2483 if (IS_64BIT_PROCESS(p))
2484 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2485 else
2486 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
91447636
A
2487
2488 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2d21ac55 2489 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
9bccf70c
A
2490 return (error);
2491 }
2492 else {
2493 if (uap->tv && interval == 0) {
91447636 2494 proc_unlock(p);
9bccf70c 2495 *retval = 1; // poll failed
9bccf70c 2496
91447636 2497 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
9bccf70c
A
2498 return (error);
2499 }
9bccf70c 2500 if (interval != 0)
55e303ae 2501 clock_absolutetime_interval_to_deadline(interval, &abstime);
91447636
A
2502 else
2503 abstime = 0;
9bccf70c 2504
2d21ac55 2505 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
91447636
A
2506
2507 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2508
2d21ac55 2509 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
91447636 2510
9bccf70c
A
2511 if (error == 0)
2512 goto retry;
2513 if (error == ERESTART)
2514 error = EINTR;
2515 if (error == EWOULDBLOCK) {
2516 *retval = 1;
2517 error = 0;
2518 }
2519 }
91447636 2520 proc_unlock(p);
9bccf70c
A
2521
2522 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
9bccf70c 2523 return (error);
1c79356b
A
2524}
2525
1c79356b
A
2526
2527/*
2528 * modwatch system call. user passes in event to modify.
2529 * if we find it we reset the event bits and que/deque event
2530 * it needed.
2531 */
2532int
91447636 2533modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
1c79356b 2534{
2d21ac55
A
2535 struct eventreq64 er;
2536 struct eventreq64 *erp = &er;
2537 struct eventqelt *evq = NULL; /* protected by error return */
91447636
A
2538 int error;
2539 struct fileproc *fp;
2540 int flag;
2541
2542 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2543
2544 /*
2545 * get user's request pkt
2d21ac55
A
2546 * just need the er_type and er_handle which sit above the
2547 * problematic er_data (32/64 issue)... so only copy in
2548 * those 2 fields
91447636 2549 */
2d21ac55
A
2550 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2551 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
91447636
A
2552 return(error);
2553 }
2554 proc_fdlock(p);
2555
2556 if (erp->er_type != EV_FD) {
2557 error = EINVAL;
2558 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2559 error = EBADF;
2d21ac55 2560#if SOCKETS
91447636
A
2561 } else if (fp->f_type == DTYPE_SOCKET) {
2562 socket_lock((struct socket *)fp->f_data, 1);
2563 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2564#endif /* SOCKETS */
91447636
A
2565 } else if (fp->f_type == DTYPE_PIPE) {
2566 PIPE_LOCK((struct pipe *)fp->f_data);
2567 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2568 } else {
2569 fp_drop(p, erp->er_handle, fp, 1);
2570 error = EINVAL;
2571 }
2572
2573 if (error) {
2574 proc_fdunlock(p);
2575 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2576 return(error);
2577 }
2578
2579 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2580 fp->f_flags &= ~FP_WAITEVENT;
2581 }
2582 proc_fdunlock(p);
2583
2584 // locate event if possible
2585 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2586 if (evq->ee_proc == p)
2587 break;
2588 }
2589 if (evq == NULL) {
2d21ac55 2590#if SOCKETS
91447636
A
2591 if (fp->f_type == DTYPE_SOCKET)
2592 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2593 else
2594#endif /* SOCKETS */
91447636
A
2595 PIPE_UNLOCK((struct pipe *)fp->f_data);
2596 fp_drop(p, erp->er_handle, fp, 0);
2597 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2598 return(EINVAL);
2599 }
2d21ac55 2600 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2601
2602 if (uap->u_eventmask == EV_RM) {
2603 EVPROCDEQUE(p, evq);
2604
2d21ac55 2605#if SOCKETS
91447636
A
2606 if (fp->f_type == DTYPE_SOCKET) {
2607 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2608 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2609 } else
2610#endif /* SOCKETS */
2611 {
91447636
A
2612 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2613 PIPE_UNLOCK((struct pipe *)fp->f_data);
2614 }
2615 fp_drop(p, erp->er_handle, fp, 0);
2616 FREE(evq, M_TEMP);
2617 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2618 return(0);
2619 }
2620 switch (uap->u_eventmask & EV_MASK) {
1c79356b 2621
91447636
A
2622 case 0:
2623 flag = 0;
2624 break;
2625
2626 case EV_RE:
2627 case EV_WR:
2628 case EV_RE|EV_WR:
2629 flag = EV_RWBYTES;
2630 break;
2631
2632 case EV_EX:
2633 flag = EV_OOB;
2634 break;
2635
2636 case EV_EX|EV_RE:
2637 case EV_EX|EV_WR:
2638 case EV_EX|EV_RE|EV_WR:
2639 flag = EV_OOB|EV_RWBYTES;
2640 break;
2641
2642 default:
2d21ac55 2643#if SOCKETS
91447636
A
2644 if (fp->f_type == DTYPE_SOCKET)
2645 socket_unlock((struct socket *)fp->f_data, 1);
2646 else
2d21ac55 2647#endif /* SOCKETS */
91447636
A
2648 PIPE_UNLOCK((struct pipe *)fp->f_data);
2649 fp_drop(p, erp->er_handle, fp, 0);
2650 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2651 return(EINVAL);
2652 }
2653 /*
2654 * since we're holding the socket/pipe lock, the event
2655 * cannot go from the unqueued state to the queued state
2656 * however, it can go from the queued state to the unqueued state
2657 * since that direction is protected by the proc_lock...
2658 * so do a quick check for EV_QUEUED w/o holding the proc lock
2659 * since by far the common case will be NOT EV_QUEUED, this saves
2660 * us taking the proc_lock the majority of the time
2661 */
2662 if (evq->ee_flags & EV_QUEUED) {
2663 /*
2664 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2665 */
2666 EVPROCDEQUE(p, evq);
2667 }
2668 /*
2669 * while the event is off the proc queue and
2670 * we're holding the socket/pipe lock
2671 * it's safe to update these fields...
2672 */
2673 evq->ee_req.er_eventbits = 0;
2674 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2675
2d21ac55 2676#if SOCKETS
91447636
A
2677 if (fp->f_type == DTYPE_SOCKET) {
2678 postevent((struct socket *)fp->f_data, 0, flag);
2679 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2680 } else
2681#endif /* SOCKETS */
2682 {
91447636
A
2683 postpipeevent((struct pipe *)fp->f_data, flag);
2684 PIPE_UNLOCK((struct pipe *)fp->f_data);
2685 }
2686 fp_drop(p, erp->er_handle, fp, 0);
2d21ac55 2687 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
91447636 2688 return(0);
1c79356b 2689}
91447636
A
2690
2691/* this routine is called from the close of fd with proc_fdlock held */
2692int
2693waitevent_close(struct proc *p, struct fileproc *fp)
2694{
2695 struct eventqelt *evq;
2696
2697
2698 fp->f_flags &= ~FP_WAITEVENT;
2699
2d21ac55 2700#if SOCKETS
91447636
A
2701 if (fp->f_type == DTYPE_SOCKET) {
2702 socket_lock((struct socket *)fp->f_data, 1);
2703 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55
A
2704 } else
2705#endif /* SOCKETS */
2706 if (fp->f_type == DTYPE_PIPE) {
91447636
A
2707 PIPE_LOCK((struct pipe *)fp->f_data);
2708 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2709 }
2710 else {
2711 return(EINVAL);
2712 }
2713 proc_fdunlock(p);
2714
2715
2716 // locate event if possible
2717 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2718 if (evq->ee_proc == p)
2719 break;
2720 }
2721 if (evq == NULL) {
2d21ac55 2722#if SOCKETS
91447636
A
2723 if (fp->f_type == DTYPE_SOCKET)
2724 socket_unlock((struct socket *)fp->f_data, 1);
2725 else
2d21ac55 2726#endif /* SOCKETS */
91447636
A
2727 PIPE_UNLOCK((struct pipe *)fp->f_data);
2728
2729 proc_fdlock(p);
2730
2731 return(EINVAL);
2732 }
2733 EVPROCDEQUE(p, evq);
2734
2d21ac55 2735#if SOCKETS
91447636
A
2736 if (fp->f_type == DTYPE_SOCKET) {
2737 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2738 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2739 } else
2740#endif /* SOCKETS */
2741 {
91447636
A
2742 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2743 PIPE_UNLOCK((struct pipe *)fp->f_data);
2744 }
2745 FREE(evq, M_TEMP);
2746
2747 proc_fdlock(p);
2748
2749 return(0);
2750}
2751
2d21ac55
A
2752
2753/*
2754 * gethostuuid
2755 *
2756 * Description: Get the host UUID from IOKit and return it to user space.
2757 *
2758 * Parameters: uuid_buf Pointer to buffer to receive UUID
2759 * timeout Timespec for timout
39236c6e 2760 * spi SPI, skip sandbox check (temporary)
2d21ac55
A
2761 *
2762 * Returns: 0 Success
2763 * EWOULDBLOCK Timeout is too short
2764 * copyout:EFAULT Bad user buffer
2765 *
2766 * Notes: A timeout seems redundant, since if it's tolerable to not
2767 * have a system UUID in hand, then why ask for one?
2768 */
2769int
b0d623f7 2770gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2d21ac55
A
2771{
2772 kern_return_t kret;
2773 int error;
2774 mach_timespec_t mach_ts; /* for IOKit call */
2775 __darwin_uuid_t uuid_kern; /* for IOKit call */
2776
39236c6e
A
2777 if (!uap->spi) {
2778#if 13841988
2779 uint32_t flags;
2780 if (temp_debug_13841988 && (0 == proc_get_darwinbgstate(p->task, &flags)) && (flags & PROC_FLAG_IOS_APPLICATION)) {
2781 printf("Unauthorized access to gethostuuid() by %s(%d)\n", p->p_comm, proc_pid(p));
2782 return (EPERM);
2783 }
2784#else
2785 /* Perform sandbox check */
2786#endif
2787 }
2788
2d21ac55
A
2789 /* Convert the 32/64 bit timespec into a mach_timespec_t */
2790 if ( proc_is64bit(p) ) {
b0d623f7 2791 struct user64_timespec ts;
2d21ac55
A
2792 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2793 if (error)
2794 return (error);
2795 mach_ts.tv_sec = ts.tv_sec;
2796 mach_ts.tv_nsec = ts.tv_nsec;
2797 } else {
b0d623f7 2798 struct user32_timespec ts;
2d21ac55
A
2799 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
2800 if (error)
2801 return (error);
2802 mach_ts.tv_sec = ts.tv_sec;
2803 mach_ts.tv_nsec = ts.tv_nsec;
2804 }
2805
2806 /* Call IOKit with the stack buffer to get the UUID */
2807 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2808
2809 /*
2810 * If we get it, copy out the data to the user buffer; note that a
2811 * uuid_t is an array of characters, so this is size invariant for
2812 * 32 vs. 64 bit.
2813 */
2814 if (kret == KERN_SUCCESS) {
2815 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2816 } else {
2817 error = EWOULDBLOCK;
2818 }
2819
2820 return (error);
2821}
316670eb
A
2822
2823/*
2824 * ledger
2825 *
2826 * Description: Omnibus system call for ledger operations
2827 */
2828int
2829ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
2830{
39236c6e
A
2831#if !CONFIG_MACF
2832#pragma unused(p)
2833#endif
316670eb
A
2834 int rval, pid, len, error;
2835#ifdef LEDGER_DEBUG
2836 struct ledger_limit_args lla;
2837#endif
2838 task_t task;
2839 proc_t proc;
2840
2841 /* Finish copying in the necessary args before taking the proc lock */
2842 error = 0;
2843 len = 0;
2844 if (args->cmd == LEDGER_ENTRY_INFO)
2845 error = copyin(args->arg3, (char *)&len, sizeof (len));
2846 else if (args->cmd == LEDGER_TEMPLATE_INFO)
2847 error = copyin(args->arg2, (char *)&len, sizeof (len));
2848#ifdef LEDGER_DEBUG
2849 else if (args->cmd == LEDGER_LIMIT)
2850 error = copyin(args->arg2, (char *)&lla, sizeof (lla));
2851#endif
2852 if (error)
2853 return (error);
2854 if (len < 0)
2855 return (EINVAL);
2856
2857 rval = 0;
2858 if (args->cmd != LEDGER_TEMPLATE_INFO) {
2859 pid = args->arg1;
2860 proc = proc_find(pid);
2861 if (proc == NULL)
2862 return (ESRCH);
2863
2864#if CONFIG_MACF
2865 error = mac_proc_check_ledger(p, proc, args->cmd);
2866 if (error) {
2867 proc_rele(proc);
2868 return (error);
2869 }
2870#endif
2871
2872 task = proc->task;
2873 }
2874
2875 switch (args->cmd) {
2876#ifdef LEDGER_DEBUG
2877 case LEDGER_LIMIT: {
39236c6e 2878 if (!kauth_cred_issuser(kauth_cred_get()))
316670eb
A
2879 rval = EPERM;
2880 rval = ledger_limit(task, &lla);
2881 proc_rele(proc);
2882 break;
2883 }
2884#endif
2885 case LEDGER_INFO: {
2886 struct ledger_info info;
2887
2888 rval = ledger_info(task, &info);
2889 proc_rele(proc);
2890 if (rval == 0)
2891 rval = copyout(&info, args->arg2,
2892 sizeof (info));
2893 break;
2894 }
2895
2896 case LEDGER_ENTRY_INFO: {
2897 void *buf;
2898 int sz;
2899
39236c6e 2900 rval = ledger_get_task_entry_info_multiple(task, &buf, &len);
316670eb
A
2901 proc_rele(proc);
2902 if ((rval == 0) && (len > 0)) {
2903 sz = len * sizeof (struct ledger_entry_info);
2904 rval = copyout(buf, args->arg2, sz);
2905 kfree(buf, sz);
2906 }
2907 if (rval == 0)
2908 rval = copyout(&len, args->arg3, sizeof (len));
2909 break;
2910 }
2911
2912 case LEDGER_TEMPLATE_INFO: {
2913 void *buf;
2914 int sz;
2915
2916 rval = ledger_template_info(&buf, &len);
2917 if ((rval == 0) && (len > 0)) {
2918 sz = len * sizeof (struct ledger_template_info);
2919 rval = copyout(buf, args->arg1, sz);
2920 kfree(buf, sz);
2921 }
2922 if (rval == 0)
2923 rval = copyout(&len, args->arg2, sizeof (len));
2924 break;
2925 }
2926
2927 default:
2928 rval = EINVAL;
2929 }
2930
2931 return (rval);
2932}
39236c6e
A
2933
2934#if CONFIG_TELEMETRY
2935int
2936telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval)
2937{
2938 int error = 0;
2939
2940 switch (args->cmd) {
2941 case TELEMETRY_CMD_TIMER_EVENT:
2942 error = telemetry_timer_event(args->deadline, args->interval, args->leeway);
2943 break;
2944 default:
2945 error = EINVAL;
2946 break;
2947 }
2948
2949 return (error);
2950}
2951#endif /* CONFIG_TELEMETRY */