]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-1504.9.17.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
2d21ac55
A
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
1c79356b
A
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/filedesc.h>
78#include <sys/ioctl.h>
91447636
A
79#include <sys/file_internal.h>
80#include <sys/proc_internal.h>
1c79356b 81#include <sys/socketvar.h>
91447636 82#include <sys/uio_internal.h>
1c79356b
A
83#include <sys/kernel.h>
84#include <sys/stat.h>
85#include <sys/malloc.h>
91447636 86#include <sys/sysproto.h>
1c79356b 87
91447636 88#include <sys/mount_internal.h>
1c79356b
A
89#include <sys/protosw.h>
90#include <sys/ev.h>
91#include <sys/user.h>
92#include <sys/kdebug.h>
91447636
A
93#include <sys/poll.h>
94#include <sys/event.h>
95#include <sys/eventvar.h>
96
97#include <mach/mach_types.h>
98#include <kern/kern_types.h>
1c79356b 99#include <kern/assert.h>
91447636
A
100#include <kern/kalloc.h>
101#include <kern/thread.h>
102#include <kern/clock.h>
1c79356b
A
103
104#include <sys/mbuf.h>
105#include <sys/socket.h>
106#include <sys/socketvar.h>
107#include <sys/errno.h>
55e303ae 108#include <sys/syscall.h>
91447636 109#include <sys/pipe.h>
1c79356b 110
b0d623f7 111#include <security/audit/audit.h>
e5568f75 112
1c79356b
A
113#include <net/if.h>
114#include <net/route.h>
115
116#include <netinet/in.h>
117#include <netinet/in_systm.h>
118#include <netinet/ip.h>
119#include <netinet/in_pcb.h>
120#include <netinet/ip_var.h>
121#include <netinet/ip6.h>
122#include <netinet/tcp.h>
123#include <netinet/tcp_fsm.h>
124#include <netinet/tcp_seq.h>
125#include <netinet/tcp_timer.h>
126#include <netinet/tcp_var.h>
127#include <netinet/tcpip.h>
128#include <netinet/tcp_debug.h>
0b4e3aa0
A
129/* for wait queue based select */
130#include <kern/wait_queue.h>
91447636 131#include <kern/kalloc.h>
91447636
A
132#include <sys/vnode_internal.h>
133
2d21ac55
A
134/* XXX should be in a header file somewhere */
135void evsofree(struct socket *);
136void evpipefree(struct pipe *);
137void postpipeevent(struct pipe *, int);
138void postevent(struct socket *, struct sockbuf *, int);
139extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
140
91447636
A
141int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
142int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
143extern void *get_bsduthreadarg(thread_t);
144extern int *get_bsduthreadrval(thread_t);
145
2d21ac55 146__private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
91447636
A
147 user_addr_t bufp, user_size_t nbyte,
148 off_t offset, int flags, user_ssize_t *retval);
2d21ac55 149__private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
91447636
A
150 user_addr_t bufp, user_size_t nbyte,
151 off_t offset, int flags, user_ssize_t *retval);
152__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
153__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
9bccf70c 154
91447636
A
155#if NETAT
156extern int appletalk_inited;
157#endif /* NETAT */
1c79356b 158
91447636
A
159#define f_flag f_fglob->fg_flag
160#define f_type f_fglob->fg_type
161#define f_msgcount f_fglob->fg_msgcount
162#define f_cred f_fglob->fg_cred
163#define f_ops f_fglob->fg_ops
164#define f_offset f_fglob->fg_offset
165#define f_data f_fglob->fg_data
2d21ac55 166
1c79356b
A
167/*
168 * Read system call.
2d21ac55
A
169 *
170 * Returns: 0 Success
171 * preparefileread:EBADF
172 * preparefileread:ESPIPE
173 * preparefileread:ENXIO
174 * preparefileread:EBADF
175 * dofileread:???
1c79356b 176 */
9bccf70c 177int
2d21ac55
A
178read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
179{
180 __pthread_testcancel(1);
181 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
182}
183
184int
185read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 186{
91447636 187 struct fileproc *fp;
9bccf70c 188 int error;
91447636 189 int fd = uap->fd;
b0d623f7 190 struct vfs_context context;
91447636
A
191
192 if ( (error = preparefileread(p, &fp, fd, 0)) )
193 return (error);
9bccf70c 194
b0d623f7
A
195 context = *(vfs_context_current());
196 context.vc_ucred = fp->f_fglob->fg_cred;
197
198 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
91447636
A
199 (off_t)-1, 0, retval);
200
201 donefileread(p, fp, fd);
202
203 return (error);
9bccf70c
A
204}
205
206/*
207 * Pread system call
2d21ac55
A
208 *
209 * Returns: 0 Success
210 * preparefileread:EBADF
211 * preparefileread:ESPIPE
212 * preparefileread:ENXIO
213 * preparefileread:EBADF
214 * dofileread:???
9bccf70c 215 */
9bccf70c 216int
2d21ac55 217pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
9bccf70c 218{
2d21ac55
A
219 __pthread_testcancel(1);
220 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
221}
222
223int
224pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
225{
226 struct fileproc *fp = NULL; /* fp set by preparefileread() */
91447636 227 int fd = uap->fd;
9bccf70c 228 int error;
b0d623f7 229 struct vfs_context context;
9bccf70c 230
91447636 231 if ( (error = preparefileread(p, &fp, fd, 1)) )
4a3eedf9 232 goto out;
91447636 233
b0d623f7
A
234 context = *(vfs_context_current());
235 context.vc_ucred = fp->f_fglob->fg_cred;
236
237 error = dofileread(&context, fp, uap->buf, uap->nbyte,
91447636 238 uap->offset, FOF_OFFSET, retval);
55e303ae 239
91447636
A
240 donefileread(p, fp, fd);
241
b7266188 242 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
55e303ae 243 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
4a3eedf9
A
244
245out:
91447636 246 return (error);
9bccf70c
A
247}
248
249/*
250 * Code common for read and pread
251 */
91447636
A
252
253void
254donefileread(struct proc *p, struct fileproc *fp, int fd)
255{
2d21ac55 256 proc_fdlock_spin(p);
91447636
A
257
258 fp->f_flags &= ~FP_INCHRREAD;
259
260 fp_drop(p, fd, fp, 1);
261 proc_fdunlock(p);
262}
263
2d21ac55
A
264/*
265 * Returns: 0 Success
266 * EBADF
267 * ESPIPE
268 * ENXIO
269 * fp_lookup:EBADF
270 * fo_read:???
271 */
91447636
A
272int
273preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
274{
275 vnode_t vp;
276 int error;
277 struct fileproc *fp;
278
b0d623f7
A
279 AUDIT_ARG(fd, fd);
280
2d21ac55 281 proc_fdlock_spin(p);
91447636
A
282
283 error = fp_lookup(p, fd, &fp, 1);
284
285 if (error) {
286 proc_fdunlock(p);
287 return (error);
288 }
289 if ((fp->f_flag & FREAD) == 0) {
290 error = EBADF;
291 goto out;
292 }
293 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
294 error = ESPIPE;
295 goto out;
296 }
297 if (fp->f_type == DTYPE_VNODE) {
298 vp = (struct vnode *)fp->f_fglob->fg_data;
299
2d21ac55
A
300 if (check_for_pread && (vnode_isfifo(vp))) {
301 error = ESPIPE;
302 goto out;
303 }
304 if (check_for_pread && (vp->v_flag & VISTTY)) {
305 error = ENXIO;
306 goto out;
307 }
91447636
A
308 if (vp->v_type == VCHR)
309 fp->f_flags |= FP_INCHRREAD;
310 }
311
312 *fp_ret = fp;
313
314 proc_fdunlock(p);
315 return (0);
316
317out:
318 fp_drop(p, fd, fp, 1);
319 proc_fdunlock(p);
320 return (error);
321}
322
323
2d21ac55
A
324/*
325 * Returns: 0 Success
326 * EINVAL
327 * fo_read:???
328 */
55e303ae 329__private_extern__ int
2d21ac55
A
330dofileread(vfs_context_t ctx, struct fileproc *fp,
331 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
332 user_ssize_t *retval)
1c79356b 333{
91447636
A
334 uio_t auio;
335 user_ssize_t bytecnt;
336 long error = 0;
337 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 338
9bccf70c
A
339 if (nbyte > INT_MAX)
340 return (EINVAL);
91447636 341
2d21ac55 342 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
343 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
344 &uio_buf[0], sizeof(uio_buf));
345 } else {
346 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
347 &uio_buf[0], sizeof(uio_buf));
348 }
349 uio_addiov(auio, bufp, nbyte);
350
91447636 351 bytecnt = nbyte;
9bccf70c 352
2d21ac55 353 if ((error = fo_read(fp, auio, flags, ctx))) {
91447636 354 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
355 error == EINTR || error == EWOULDBLOCK))
356 error = 0;
357 }
91447636 358 bytecnt -= uio_resid(auio);
91447636
A
359
360 *retval = bytecnt;
361
9bccf70c 362 return (error);
1c79356b
A
363}
364
9bccf70c
A
365/*
366 * Scatter read system call.
2d21ac55
A
367 *
368 * Returns: 0 Success
369 * EINVAL
370 * ENOMEM
371 * copyin:EFAULT
372 * rd_uio:???
9bccf70c 373 */
9bccf70c 374int
2d21ac55
A
375readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
376{
377 __pthread_testcancel(1);
378 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
379}
380
381int
382readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
1c79356b 383{
91447636 384 uio_t auio = NULL;
1c79356b 385 int error;
91447636
A
386 struct user_iovec *iovp;
387
388 /* Verify range bedfore calling uio_create() */
389 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
390 return (EINVAL);
391
392 /* allocate a uio large enough to hold the number of iovecs passed */
393 auio = uio_create(uap->iovcnt, 0,
394 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
395 UIO_READ);
396
397 /* get location of iovecs within the uio. then copyin the iovecs from
398 * user space.
399 */
400 iovp = uio_iovsaddr(auio);
401 if (iovp == NULL) {
402 error = ENOMEM;
403 goto ExitThisRoutine;
404 }
b0d623f7
A
405 error = copyin_user_iovec_array(uap->iovp,
406 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
407 uap->iovcnt, iovp);
91447636
A
408 if (error) {
409 goto ExitThisRoutine;
410 }
411
412 /* finalize uio_t for use and do the IO
413 */
414 uio_calculateresid(auio);
415 error = rd_uio(p, uap->fd, auio, retval);
416
417ExitThisRoutine:
418 if (auio != NULL) {
419 uio_free(auio);
420 }
1c79356b
A
421 return (error);
422}
423
424/*
425 * Write system call
2d21ac55
A
426 *
427 * Returns: 0 Success
428 * EBADF
429 * fp_lookup:EBADF
430 * dofilewrite:???
1c79356b 431 */
9bccf70c 432int
2d21ac55
A
433write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
434{
435 __pthread_testcancel(1);
436 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
437
438}
439
440int
441write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
1c79356b 442{
91447636 443 struct fileproc *fp;
9bccf70c 444 int error;
91447636 445 int fd = uap->fd;
9bccf70c 446
b0d623f7
A
447 AUDIT_ARG(fd, fd);
448
91447636
A
449 error = fp_lookup(p,fd,&fp,0);
450 if (error)
451 return(error);
452 if ((fp->f_flag & FWRITE) == 0) {
453 error = EBADF;
454 } else {
2d21ac55
A
455 struct vfs_context context = *(vfs_context_current());
456 context.vc_ucred = fp->f_fglob->fg_cred;
457
458 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
9bccf70c 459 (off_t)-1, 0, retval);
91447636
A
460 }
461 if (error == 0)
462 fp_drop_written(p, fd, fp);
463 else
464 fp_drop(p, fd, fp, 0);
9bccf70c
A
465 return(error);
466}
467
468/*
91447636 469 * pwrite system call
2d21ac55
A
470 *
471 * Returns: 0 Success
472 * EBADF
473 * ESPIPE
474 * ENXIO
475 * EINVAL
476 * fp_lookup:EBADF
477 * dofilewrite:???
9bccf70c 478 */
9bccf70c 479int
2d21ac55
A
480pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
481{
482 __pthread_testcancel(1);
483 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
484}
485
486int
487pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 488{
91447636 489 struct fileproc *fp;
9bccf70c 490 int error;
91447636 491 int fd = uap->fd;
2d21ac55 492 vnode_t vp = (vnode_t)0;
91447636 493
b0d623f7
A
494 AUDIT_ARG(fd, fd);
495
91447636
A
496 error = fp_lookup(p,fd,&fp,0);
497 if (error)
498 return(error);
9bccf70c 499
91447636
A
500 if ((fp->f_flag & FWRITE) == 0) {
501 error = EBADF;
502 } else {
2d21ac55
A
503 struct vfs_context context = *vfs_context_current();
504 context.vc_ucred = fp->f_fglob->fg_cred;
505
91447636
A
506 if (fp->f_type != DTYPE_VNODE) {
507 error = ESPIPE;
2d21ac55
A
508 goto errout;
509 }
510 vp = (vnode_t)fp->f_fglob->fg_data;
511 if (vnode_isfifo(vp)) {
512 error = ESPIPE;
513 goto errout;
514 }
515 if ((vp->v_flag & VISTTY)) {
516 error = ENXIO;
517 goto errout;
91447636 518 }
2d21ac55
A
519 if (uap->offset == (off_t)-1) {
520 error = EINVAL;
521 goto errout;
522 }
523
524 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
525 uap->offset, FOF_OFFSET, retval);
9bccf70c 526 }
2d21ac55 527errout:
91447636
A
528 if (error == 0)
529 fp_drop_written(p, fd, fp);
530 else
531 fp_drop(p, fd, fp, 0);
55e303ae 532
b7266188 533 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
55e303ae
A
534 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
535
9bccf70c
A
536 return(error);
537}
538
2d21ac55
A
539/*
540 * Returns: 0 Success
541 * EINVAL
542 * <fo_write>:EPIPE
543 * <fo_write>:??? [indirect through struct fileops]
544 */
55e303ae 545__private_extern__ int
2d21ac55
A
546dofilewrite(vfs_context_t ctx, struct fileproc *fp,
547 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
548 user_ssize_t *retval)
9bccf70c 549{
91447636
A
550 uio_t auio;
551 long error = 0;
552 user_ssize_t bytecnt;
553 char uio_buf[ UIO_SIZEOF(1) ];
91447636 554
9bccf70c
A
555 if (nbyte > INT_MAX)
556 return (EINVAL);
91447636 557
2d21ac55 558 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
91447636
A
559 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
560 &uio_buf[0], sizeof(uio_buf));
561 } else {
562 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
563 &uio_buf[0], sizeof(uio_buf));
564 }
565 uio_addiov(auio, bufp, nbyte);
566
91447636 567 bytecnt = nbyte;
2d21ac55 568 if ((error = fo_write(fp, auio, flags, ctx))) {
91447636 569 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
9bccf70c
A
570 error == EINTR || error == EWOULDBLOCK))
571 error = 0;
55e303ae 572 /* The socket layer handles SIGPIPE */
2d21ac55
A
573 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
574 /* XXX Raise the signal on the thread? */
575 psignal(vfs_context_proc(ctx), SIGPIPE);
576 }
9bccf70c 577 }
91447636 578 bytecnt -= uio_resid(auio);
91447636
A
579 *retval = bytecnt;
580
9bccf70c 581 return (error);
1c79356b 582}
9bccf70c
A
583
584/*
585 * Gather write system call
586 */
9bccf70c 587int
2d21ac55
A
588writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
589{
590 __pthread_testcancel(1);
591 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
592}
593
594int
595writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
1c79356b 596{
91447636 597 uio_t auio = NULL;
1c79356b 598 int error;
91447636
A
599 struct user_iovec *iovp;
600
b0d623f7
A
601 AUDIT_ARG(fd, uap->fd);
602
91447636
A
603 /* Verify range bedfore calling uio_create() */
604 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
605 return (EINVAL);
606
607 /* allocate a uio large enough to hold the number of iovecs passed */
608 auio = uio_create(uap->iovcnt, 0,
609 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
610 UIO_WRITE);
611
612 /* get location of iovecs within the uio. then copyin the iovecs from
613 * user space.
614 */
615 iovp = uio_iovsaddr(auio);
616 if (iovp == NULL) {
617 error = ENOMEM;
618 goto ExitThisRoutine;
619 }
b0d623f7
A
620 error = copyin_user_iovec_array(uap->iovp,
621 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
622 uap->iovcnt, iovp);
91447636
A
623 if (error) {
624 goto ExitThisRoutine;
625 }
626
627 /* finalize uio_t for use and do the IO
628 */
629 uio_calculateresid(auio);
630 error = wr_uio(p, uap->fd, auio, retval);
631
632ExitThisRoutine:
633 if (auio != NULL) {
634 uio_free(auio);
635 }
1c79356b
A
636 return (error);
637}
638
91447636 639
9bccf70c 640int
2d21ac55 641wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
1c79356b 642{
91447636
A
643 struct fileproc *fp;
644 int error;
645 user_ssize_t count;
2d21ac55 646 struct vfs_context context = *vfs_context_current();
1c79356b 647
91447636
A
648 error = fp_lookup(p,fdes,&fp,0);
649 if (error)
650 return(error);
1c79356b 651
91447636
A
652 if ((fp->f_flag & FWRITE) == 0) {
653 error = EBADF;
654 goto out;
1c79356b 655 }
91447636 656 count = uio_resid(uio);
2d21ac55
A
657
658 context.vc_ucred = fp->f_cred;
659 error = fo_write(fp, uio, 0, &context);
91447636
A
660 if (error) {
661 if (uio_resid(uio) != count && (error == ERESTART ||
662 error == EINTR || error == EWOULDBLOCK))
663 error = 0;
664 /* The socket layer handles SIGPIPE */
665 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
666 psignal(p, SIGPIPE);
667 }
668 *retval = count - uio_resid(uio);
669
91447636
A
670out:
671 if ( (error == 0) )
672 fp_drop_written(p, fdes, fp);
673 else
674 fp_drop(p, fdes, fp, 0);
675 return(error);
676}
677
678
679int
2d21ac55 680rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
91447636
A
681{
682 struct fileproc *fp;
683 int error;
684 user_ssize_t count;
2d21ac55 685 struct vfs_context context = *vfs_context_current();
91447636
A
686
687 if ( (error = preparefileread(p, &fp, fdes, 0)) )
688 return (error);
689
690 count = uio_resid(uio);
2d21ac55
A
691
692 context.vc_ucred = fp->f_cred;
693
694 error = fo_read(fp, uio, 0, &context);
9bccf70c 695
91447636
A
696 if (error) {
697 if (uio_resid(uio) != count && (error == ERESTART ||
698 error == EINTR || error == EWOULDBLOCK))
699 error = 0;
1c79356b 700 }
91447636 701 *retval = count - uio_resid(uio);
9bccf70c 702
91447636 703 donefileread(p, fp, fdes);
9bccf70c 704
91447636 705 return (error);
1c79356b
A
706}
707
708/*
709 * Ioctl system call
91447636 710 *
2d21ac55
A
711 * Returns: 0 Success
712 * EBADF
713 * ENOTTY
714 * ENOMEM
715 * ESRCH
716 * copyin:EFAULT
717 * copyoutEFAULT
718 * fp_lookup:EBADF Bad file descriptor
719 * fo_ioctl:???
1c79356b 720 */
9bccf70c 721int
b0d623f7 722ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
1c79356b 723{
91447636 724 struct fileproc *fp;
2d21ac55 725 u_long com;
91447636 726 int error = 0;
2d21ac55 727 u_int size;
91447636
A
728 caddr_t datap, memp;
729 boolean_t is64bit;
1c79356b
A
730 int tmp;
731#define STK_PARAMS 128
732 char stkbuf[STK_PARAMS];
91447636 733 int fd = uap->fd;
2d21ac55 734 struct vfs_context context = *vfs_context_current();
1c79356b 735
e5568f75 736 AUDIT_ARG(fd, uap->fd);
e5568f75 737 AUDIT_ARG(addr, uap->data);
91447636
A
738
739 is64bit = proc_is64bit(p);
b0d623f7
A
740#if CONFIG_AUDIT
741 if (is64bit)
742 AUDIT_ARG(value64, uap->com);
743 else
744 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, uap->com));
745#endif /* CONFIG_AUDIT */
91447636
A
746
747 proc_fdlock(p);
748 error = fp_lookup(p,fd,&fp,1);
749 if (error) {
750 proc_fdunlock(p);
751 return(error);
752 }
1c79356b 753
e5568f75 754 AUDIT_ARG(file, p, fp);
91447636
A
755
756 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
757 error = EBADF;
758 goto out;
759 }
2d21ac55
A
760
761 context.vc_ucred = fp->f_fglob->fg_cred;
762
763#if CONFIG_MACF
764 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com);
765 if (error)
766 goto out;
767#endif
1c79356b 768
9bccf70c
A
769#if NETAT
770 /*
771 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
1c79356b
A
772 * while implementing an ATioctl system call
773 */
1c79356b 774 {
1c79356b 775 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
91447636 776 u_long fixed_command;
2d21ac55 777
1c79356b
A
778#ifdef APPLETALK_DEBUG
779 kprintf("ioctl: special AppleTalk \n");
780#endif
91447636
A
781 datap = &stkbuf[0];
782 *(user_addr_t *)datap = uap->data;
783 fixed_command = _IOW(0, 0xff99, uap->data);
2d21ac55 784 error = fo_ioctl(fp, fixed_command, datap, &context);
91447636 785 goto out;
1c79356b
A
786 }
787 }
788
789#endif /* NETAT */
790
791
792 switch (com = uap->com) {
793 case FIONCLEX:
794 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
91447636
A
795 error =0;
796 goto out;
1c79356b
A
797 case FIOCLEX:
798 *fdflags(p, uap->fd) |= UF_EXCLOSE;
91447636
A
799 error =0;
800 goto out;
1c79356b
A
801 }
802
803 /*
804 * Interpret high order word to find amount of data to be
805 * copied to/from the user's address space.
806 */
807 size = IOCPARM_LEN(com);
91447636
A
808 if (size > IOCPARM_MAX) {
809 error = ENOTTY;
810 goto out;
811 }
1c79356b
A
812 memp = NULL;
813 if (size > sizeof (stkbuf)) {
91447636
A
814 proc_fdunlock(p);
815 if ((memp = (caddr_t)kalloc(size)) == 0) {
816 proc_fdlock(p);
817 error = ENOMEM;
818 goto out;
819 }
820 proc_fdlock(p);
821 datap = memp;
1c79356b 822 } else
91447636 823 datap = &stkbuf[0];
1c79356b
A
824 if (com&IOC_IN) {
825 if (size) {
91447636
A
826 proc_fdunlock(p);
827 error = copyin(uap->data, datap, size);
1c79356b
A
828 if (error) {
829 if (memp)
830 kfree(memp, size);
91447636
A
831 proc_fdlock(p);
832 goto out;
1c79356b 833 }
91447636
A
834 proc_fdlock(p);
835 } else {
836 /* XXX - IOC_IN and no size? we should proably return an error here!! */
837 if (is64bit) {
838 *(user_addr_t *)datap = uap->data;
839 }
840 else {
841 *(uint32_t *)datap = (uint32_t)uap->data;
842 }
843 }
1c79356b
A
844 } else if ((com&IOC_OUT) && size)
845 /*
846 * Zero the buffer so the user always
847 * gets back something deterministic.
848 */
91447636
A
849 bzero(datap, size);
850 else if (com&IOC_VOID) {
851 /* XXX - this is odd since IOC_VOID means no parameters */
852 if (is64bit) {
853 *(user_addr_t *)datap = uap->data;
854 }
855 else {
856 *(uint32_t *)datap = (uint32_t)uap->data;
857 }
858 }
1c79356b
A
859
860 switch (com) {
861
862 case FIONBIO:
91447636 863 if ( (tmp = *(int *)datap) )
1c79356b
A
864 fp->f_flag |= FNONBLOCK;
865 else
866 fp->f_flag &= ~FNONBLOCK;
2d21ac55 867 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1c79356b
A
868 break;
869
870 case FIOASYNC:
91447636 871 if ( (tmp = *(int *)datap) )
1c79356b
A
872 fp->f_flag |= FASYNC;
873 else
874 fp->f_flag &= ~FASYNC;
2d21ac55 875 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1c79356b
A
876 break;
877
878 case FIOSETOWN:
91447636 879 tmp = *(int *)datap;
1c79356b
A
880 if (fp->f_type == DTYPE_SOCKET) {
881 ((struct socket *)fp->f_data)->so_pgid = tmp;
882 error = 0;
883 break;
884 }
91447636 885 if (fp->f_type == DTYPE_PIPE) {
2d21ac55 886 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
91447636
A
887 break;
888 }
1c79356b
A
889 if (tmp <= 0) {
890 tmp = -tmp;
891 } else {
2d21ac55 892 struct proc *p1 = proc_find(tmp);
1c79356b
A
893 if (p1 == 0) {
894 error = ESRCH;
895 break;
896 }
2d21ac55
A
897 tmp = p1->p_pgrpid;
898 proc_rele(p1);
1c79356b 899 }
2d21ac55 900 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
1c79356b
A
901 break;
902
903 case FIOGETOWN:
904 if (fp->f_type == DTYPE_SOCKET) {
905 error = 0;
91447636 906 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
1c79356b
A
907 break;
908 }
2d21ac55 909 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
91447636 910 *(int *)datap = -*(int *)datap;
1c79356b
A
911 break;
912
913 default:
2d21ac55 914 error = fo_ioctl(fp, com, datap, &context);
1c79356b
A
915 /*
916 * Copy any data to user, size was
917 * already set and checked above.
918 */
919 if (error == 0 && (com&IOC_OUT) && size)
91447636 920 error = copyout(datap, uap->data, (u_int)size);
1c79356b
A
921 break;
922 }
91447636 923 proc_fdunlock(p);
1c79356b
A
924 if (memp)
925 kfree(memp, size);
91447636
A
926 proc_fdlock(p);
927out:
928 fp_drop(p, fd, fp, 1);
929 proc_fdunlock(p);
930 return(error);
1c79356b
A
931}
932
1c79356b 933int selwait, nselcoll;
0b4e3aa0
A
934#define SEL_FIRSTPASS 1
935#define SEL_SECONDPASS 2
9bccf70c
A
936extern int selcontinue(int error);
937extern int selprocess(int error, int sel_pass);
938static int selscan(struct proc *p, struct _select * sel,
b0d623f7 939 int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
9bccf70c 940static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
2d21ac55 941 int nfd, int * count, int *kfcount);
91447636 942static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
1c79356b
A
943
944/*
945 * Select system call.
2d21ac55
A
946 *
947 * Returns: 0 Success
948 * EINVAL Invalid argument
949 * EAGAIN Nonconformant error if allocation fails
950 * selprocess:???
1c79356b 951 */
9bccf70c 952int
b0d623f7 953select(struct proc *p, struct select_args *uap, int32_t *retval)
2d21ac55
A
954{
955 __pthread_testcancel(1);
956 return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
957}
958
959int
b0d623f7 960select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
1c79356b 961{
9bccf70c 962 int error = 0;
0b4e3aa0 963 u_int ni, nw, size;
91447636 964 thread_t th_act;
1c79356b
A
965 struct uthread *uth;
966 struct _select *sel;
967 int needzerofill = 1;
0b4e3aa0 968 int count = 0;
2d21ac55 969 int kfcount = 0;
1c79356b 970
91447636 971 th_act = current_thread();
1c79356b 972 uth = get_bsdthread_info(th_act);
91447636 973 sel = &uth->uu_select;
1c79356b
A
974 retval = (int *)get_bsduthreadrval(th_act);
975 *retval = 0;
976
0b4e3aa0 977 if (uap->nd < 0) {
1c79356b 978 return (EINVAL);
0b4e3aa0 979 }
1c79356b 980
2d21ac55
A
981 /* select on thread of process that already called proc_exit() */
982 if (p->p_fd == NULL) {
983 return (EBADF);
984 }
985
1c79356b
A
986 if (uap->nd > p->p_fd->fd_nfiles)
987 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
988
989 nw = howmany(uap->nd, NFDBITS);
990 ni = nw * sizeof(fd_mask);
991
992 /*
2d21ac55
A
993 * if the previously allocated space for the bits is smaller than
994 * what is requested or no space has yet been allocated for this
995 * thread, allocate enough space now.
996 *
997 * Note: If this process fails, select() will return EAGAIN; this
998 * is the same thing pool() returns in a no-memory situation, but
999 * it is not a POSIX compliant error code for select().
1c79356b
A
1000 */
1001 if (sel->nbytes < (3 * ni)) {
2d21ac55
A
1002 int nbytes = 3 * ni;
1003
1004 /* Free previous allocation, if any */
1005 if (sel->ibits != NULL)
1006 FREE(sel->ibits, M_TEMP);
1007 if (sel->obits != NULL) {
1008 FREE(sel->obits, M_TEMP);
1009 /* NULL out; subsequent ibits allocation may fail */
1010 sel->obits = NULL;
1011 }
1012
1013 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1014 if (sel->ibits == NULL)
1015 return (EAGAIN);
1016 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1017 if (sel->obits == NULL) {
1018 FREE(sel->ibits, M_TEMP);
1019 sel->ibits = NULL;
1020 return (EAGAIN);
1021 }
1022 sel->nbytes = nbytes;
1c79356b 1023 needzerofill = 0;
2d21ac55 1024 }
1c79356b
A
1025
1026 if (needzerofill) {
1027 bzero((caddr_t)sel->ibits, sel->nbytes);
1028 bzero((caddr_t)sel->obits, sel->nbytes);
1029 }
1030
1031 /*
1032 * get the bits from the user address space
1033 */
1034#define getbits(name, x) \
1035 do { \
91447636 1036 if (uap->name && (error = copyin(uap->name, \
1c79356b
A
1037 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1038 goto continuation; \
1039 } while (0)
1040
1041 getbits(in, 0);
1042 getbits(ou, 1);
1043 getbits(ex, 2);
1044#undef getbits
1045
1046 if (uap->tv) {
9bccf70c 1047 struct timeval atv;
91447636 1048 if (IS_64BIT_PROCESS(p)) {
b0d623f7 1049 struct user64_timeval atv64;
91447636
A
1050 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1051 /* Loses resolution - assume timeout < 68 years */
1052 atv.tv_sec = atv64.tv_sec;
1053 atv.tv_usec = atv64.tv_usec;
1054 } else {
b0d623f7
A
1055 struct user32_timeval atv32;
1056 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
1057 atv.tv_sec = atv32.tv_sec;
1058 atv.tv_usec = atv32.tv_usec;
91447636 1059 }
1c79356b
A
1060 if (error)
1061 goto continuation;
9bccf70c 1062 if (itimerfix(&atv)) {
1c79356b
A
1063 error = EINVAL;
1064 goto continuation;
1065 }
0b4e3aa0 1066
9bccf70c
A
1067 clock_absolutetime_interval_to_deadline(
1068 tvtoabstime(&atv), &sel->abstime);
1069 }
1070 else
1071 sel->abstime = 0;
1072
2d21ac55
A
1073 sel->kfcount = 0;
1074 if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
0b4e3aa0
A
1075 goto continuation;
1076 }
b0d623f7 1077
0b4e3aa0 1078 sel->count = count;
2d21ac55 1079 sel->kfcount = kfcount;
91447636 1080 size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
2d21ac55
A
1081 if (uth->uu_allocsize) {
1082 if (uth->uu_wqset == 0)
0b4e3aa0
A
1083 panic("select: wql memory smashed");
1084 /* needed for the select now */
2d21ac55
A
1085 if (size > uth->uu_allocsize) {
1086 kfree(uth->uu_wqset, uth->uu_allocsize);
1087 uth->uu_allocsize = size;
1088 uth->uu_wqset = (wait_queue_set_t)kalloc(size);
1089 if (uth->uu_wqset == (wait_queue_set_t)NULL)
0b4e3aa0 1090 panic("failed to allocate memory for waitqueue\n");
0b4e3aa0
A
1091 }
1092 } else {
1093 sel->count = count;
2d21ac55
A
1094 uth->uu_allocsize = size;
1095 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
1096 if (uth->uu_wqset == (wait_queue_set_t)NULL)
0b4e3aa0 1097 panic("failed to allocate memory for waitqueue\n");
0b4e3aa0 1098 }
2d21ac55
A
1099 bzero(uth->uu_wqset, size);
1100 sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
1101 wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
0b4e3aa0 1102
1c79356b 1103continuation:
9bccf70c 1104 return selprocess(error, SEL_FIRSTPASS);
0b4e3aa0
A
1105}
1106
1107int
1108selcontinue(int error)
1109{
9bccf70c 1110 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
1111}
1112
1113int
91447636 1114selprocess(int error, int sel_pass)
1c79356b 1115{
9bccf70c 1116 int ncoll;
1c79356b 1117 u_int ni, nw;
91447636 1118 thread_t th_act;
1c79356b
A
1119 struct uthread *uth;
1120 struct proc *p;
1121 struct select_args *uap;
1122 int *retval;
1123 struct _select *sel;
0b4e3aa0 1124 int unwind = 1;
9bccf70c 1125 int prepost = 0;
0b4e3aa0
A
1126 int somewakeup = 0;
1127 int doretry = 0;
9bccf70c 1128 wait_result_t wait_result;
1c79356b
A
1129
1130 p = current_proc();
91447636 1131 th_act = current_thread();
1c79356b
A
1132 uap = (struct select_args *)get_bsduthreadarg(th_act);
1133 retval = (int *)get_bsduthreadrval(th_act);
1134 uth = get_bsdthread_info(th_act);
91447636 1135 sel = &uth->uu_select;
1c79356b 1136
0b4e3aa0
A
1137 /* if it is first pass wait queue is not setup yet */
1138 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1139 unwind = 0;
1140 if (sel->count == 0)
1141 unwind = 0;
1c79356b 1142retry:
0b4e3aa0 1143 if (error != 0) {
1c79356b 1144 goto done;
0b4e3aa0
A
1145 }
1146
1c79356b 1147 ncoll = nselcoll;
b0d623f7 1148 OSBitOrAtomic(P_SELECT, &p->p_flag);
0b4e3aa0
A
1149 /* skip scans if the select is just for timeouts */
1150 if (sel->count) {
1151 if (sel_pass == SEL_FIRSTPASS)
2d21ac55 1152 wait_queue_sub_clearrefs(uth->uu_wqset);
0b4e3aa0 1153
2d21ac55 1154 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
0b4e3aa0
A
1155 if (error || *retval) {
1156 goto done;
1157 }
1158 if (prepost) {
1159 /* if the select of log, then we canwakeup and discover some one
1160 * else already read the data; go toselct again if time permits
1161 */
1162 prepost = 0;
1163 doretry = 1;
1164 }
1165 if (somewakeup) {
1166 somewakeup = 0;
1167 doretry = 1;
1168 }
1169 }
1170
9bccf70c
A
1171 if (uap->tv) {
1172 uint64_t now;
1173
1174 clock_get_uptime(&now);
1175 if (now >= sel->abstime)
1176 goto done;
1c79356b 1177 }
0b4e3aa0
A
1178
1179 if (doretry) {
1180 /* cleanup obits and try again */
1181 doretry = 0;
1182 sel_pass = SEL_FIRSTPASS;
1183 goto retry;
1184 }
1185
1c79356b
A
1186 /*
1187 * To effect a poll, the timeout argument should be
1188 * non-nil, pointing to a zero-valued timeval structure.
1189 */
9bccf70c 1190 if (uap->tv && sel->abstime == 0) {
1c79356b
A
1191 goto done;
1192 }
0b4e3aa0
A
1193
1194 /* No spurious wakeups due to colls,no need to check for them */
1195 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1196 sel_pass = SEL_FIRSTPASS;
1c79356b
A
1197 goto retry;
1198 }
0b4e3aa0 1199
b0d623f7 1200 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b 1201
0b4e3aa0
A
1202 /* if the select is just for timeout skip check */
1203 if (sel->count &&(sel_pass == SEL_SECONDPASS))
1204 panic("selprocess: 2nd pass assertwaiting");
1205
1206 /* Wait Queue Subordinate has waitqueue as first element */
2d21ac55 1207 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset,
b0d623f7 1208 NULL, THREAD_ABORTSAFE, sel->abstime);
9bccf70c
A
1209 if (wait_result != THREAD_AWAKENED) {
1210 /* there are no preposted events */
91447636
A
1211 error = tsleep1(NULL, PSOCK | PCATCH,
1212 "select", 0, selcontinue);
0b4e3aa0
A
1213 } else {
1214 prepost = 1;
1215 error = 0;
1216 }
1217
1218 sel_pass = SEL_SECONDPASS;
1219 if (error == 0) {
1220 if (!prepost)
1221 somewakeup =1;
1c79356b 1222 goto retry;
0b4e3aa0 1223 }
1c79356b 1224done:
91447636 1225 if (unwind) {
2d21ac55 1226 wait_subqueue_unlink_all(uth->uu_wqset);
91447636
A
1227 seldrop(p, sel->ibits, uap->nd);
1228 }
b0d623f7 1229 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b
A
1230 /* select is not restarted after signals... */
1231 if (error == ERESTART)
1232 error = EINTR;
1233 if (error == EWOULDBLOCK)
1234 error = 0;
1c79356b
A
1235 nw = howmany(uap->nd, NFDBITS);
1236 ni = nw * sizeof(fd_mask);
1237
1238#define putbits(name, x) \
1239 do { \
91447636
A
1240 if (uap->name && (error2 = \
1241 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1c79356b
A
1242 error = error2; \
1243 } while (0)
1244
1245 if (error == 0) {
1246 int error2;
1247
1248 putbits(in, 0);
1249 putbits(ou, 1);
1250 putbits(ex, 2);
1251#undef putbits
1252 }
1c79356b 1253 return(error);
1c79356b
A
1254}
1255
1256static int
b0d623f7 1257selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
2d21ac55 1258 int sel_pass, wait_queue_sub_t wqsub)
1c79356b 1259{
2d21ac55
A
1260 struct filedesc *fdp = p->p_fd;
1261 int msk, i, j, fd;
1262 u_int32_t bits;
91447636 1263 struct fileproc *fp;
1c79356b 1264 int n = 0;
0b4e3aa0 1265 int nc = 0;
1c79356b
A
1266 static int flag[3] = { FREAD, FWRITE, 0 };
1267 u_int32_t *iptr, *optr;
1268 u_int nw;
0b4e3aa0
A
1269 u_int32_t *ibits, *obits;
1270 char * wql;
0b4e3aa0 1271 char * wql_ptr;
2d21ac55 1272 int count, kfcount;
2d21ac55
A
1273 vnode_t vp;
1274 struct vfs_context context = *vfs_context_current();
1c79356b
A
1275
1276 /*
1277 * Problems when reboot; due to MacOSX signal probs
1278 * in Beaker1C ; verify that the p->p_fd is valid
1279 */
1280 if (fdp == NULL) {
1281 *retval=0;
1282 return(EIO);
1283 }
0b4e3aa0
A
1284 ibits = sel->ibits;
1285 obits = sel->obits;
1286 wql = sel->wql;
1287
1c79356b
A
1288 nw = howmany(nfd, NFDBITS);
1289
2d21ac55
A
1290 count = sel->count;
1291 kfcount = sel->kfcount;
0b4e3aa0 1292
2d21ac55
A
1293 if (kfcount > count)
1294 panic("selscan: count < kfcount");
1295
1296 if (kfcount != 0) {
2d21ac55 1297 proc_fdlock(p);
0b4e3aa0
A
1298 for (msk = 0; msk < 3; msk++) {
1299 iptr = (u_int32_t *)&ibits[msk * nw];
1300 optr = (u_int32_t *)&obits[msk * nw];
91447636 1301
0b4e3aa0
A
1302 for (i = 0; i < nfd; i += NFDBITS) {
1303 bits = iptr[i/NFDBITS];
91447636 1304
0b4e3aa0
A
1305 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1306 bits &= ~(1 << j);
1307 fp = fdp->fd_ofiles[fd];
91447636 1308
0b4e3aa0
A
1309 if (fp == NULL ||
1310 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
91447636 1311 proc_fdunlock(p);
0b4e3aa0
A
1312 return(EBADF);
1313 }
91447636 1314 if (sel_pass == SEL_SECONDPASS) {
0b4e3aa0 1315 wql_ptr = (char *)0;
91447636
A
1316 fp->f_flags &= ~FP_INSELECT;
1317 fp->f_waddr = (void *)0;
1318 } else {
1319 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1320 fp->f_flags |= FP_INSELECT;
1321 fp->f_waddr = (void *)wqsub;
1322 }
2d21ac55
A
1323
1324 context.vc_ucred = fp->f_cred;
1325
1326 if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
1327 && ((vp = (struct vnode *)fp->f_data) != NULLVP)
1328 && (vp->v_type == VCHR)
1329 && fo_select(fp, flag[msk], wql_ptr, &context)) {
0b4e3aa0
A
1330 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1331 n++;
1332 }
1333 nc++;
1334 }
1335 }
1336 }
2d21ac55 1337 proc_fdunlock(p);
2d21ac55
A
1338 }
1339
1340 nc = 0;
1341 if (kfcount != count) {
1342 proc_fdlock(p);
1343 for (msk = 0; msk < 3; msk++) {
1344 iptr = (u_int32_t *)&ibits[msk * nw];
1345 optr = (u_int32_t *)&obits[msk * nw];
1346
1347 for (i = 0; i < nfd; i += NFDBITS) {
1348 bits = iptr[i/NFDBITS];
1349
1350 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1351 bits &= ~(1 << j);
1352 fp = fdp->fd_ofiles[fd];
1353
1354 if (fp == NULL ||
1355 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1356 proc_fdunlock(p);
1357 return(EBADF);
1358 }
1359 if (sel_pass == SEL_SECONDPASS) {
1360 wql_ptr = (char *)0;
1361 fp->f_flags &= ~FP_INSELECT;
1362 fp->f_waddr = (void *)0;
1363 } else {
1364 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1365 fp->f_flags |= FP_INSELECT;
1366 fp->f_waddr = (void *)wqsub;
1367 }
1368
1369 context.vc_ucred = fp->f_cred;
1370
1371 if ((fp->f_ops &&
1372 ((fp->f_type != DTYPE_VNODE)
1373 || (((vp = (struct vnode *)fp->f_data) != NULLVP)
1374 && (vp->v_type != VCHR))
1375 )
1376 && fo_select(fp, flag[msk], wql_ptr, &context))) {
1377 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1378 n++;
1379 }
1380 nc++;
1381 }
1382 }
1383 }
1384 proc_fdunlock(p);
0b4e3aa0 1385 }
1c79356b
A
1386 *retval = n;
1387 return (0);
1388}
1389
b0d623f7 1390int poll_callback(struct kqueue *, struct kevent64_s *, void *);
91447636
A
1391
1392struct poll_continue_args {
1393 user_addr_t pca_fds;
1394 u_int pca_nfds;
1395 u_int pca_rfds;
1396};
1397
9bccf70c 1398int
b0d623f7 1399poll(struct proc *p, struct poll_args *uap, int32_t *retval)
2d21ac55
A
1400{
1401 __pthread_testcancel(1);
1402 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1403}
1404
1405
1406int
b0d623f7 1407poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1c79356b 1408{
91447636
A
1409 struct poll_continue_args *cont;
1410 struct pollfd *fds;
1411 struct kqueue *kq;
1412 struct timeval atv;
1413 int ncoll, error = 0;
1414 u_int nfds = uap->nfds;
1415 u_int rfds = 0;
1416 u_int i;
1417 size_t ni;
1c79356b 1418
91447636
A
1419 /*
1420 * This is kinda bogus. We have fd limits, but that is not
1421 * really related to the size of the pollfd array. Make sure
1422 * we let the process use at least FD_SETSIZE entries and at
1423 * least enough for the current limits. We want to be reasonably
1424 * safe, but not overly restrictive.
1425 */
1426 if (nfds > OPEN_MAX ||
2d21ac55 1427 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
91447636 1428 return (EINVAL);
1c79356b 1429
91447636
A
1430 kq = kqueue_alloc(p);
1431 if (kq == NULL)
1432 return (EAGAIN);
1433
1434 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1435 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1436 if (NULL == cont) {
1437 error = EAGAIN;
1438 goto out;
1439 }
1440
1441 fds = (struct pollfd *)&cont[1];
1442 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1443 if (error)
1444 goto out;
1445
1446 if (uap->timeout != -1) {
1447 struct timeval rtv;
1448
1449 atv.tv_sec = uap->timeout / 1000;
1450 atv.tv_usec = (uap->timeout % 1000) * 1000;
1451 if (itimerfix(&atv)) {
1452 error = EINVAL;
1453 goto out;
1454 }
1455 getmicrouptime(&rtv);
1456 timevaladd(&atv, &rtv);
1457 } else {
1458 atv.tv_sec = 0;
1459 atv.tv_usec = 0;
1460 }
1461
1462 /* JMM - all this P_SELECT stuff is bogus */
1463 ncoll = nselcoll;
b0d623f7 1464 OSBitOrAtomic(P_SELECT, &p->p_flag);
91447636
A
1465 for (i = 0; i < nfds; i++) {
1466 short events = fds[i].events;
b0d623f7 1467 struct kevent64_s kev;
91447636
A
1468 int kerror = 0;
1469
1470 /* per spec, ignore fd values below zero */
1471 if (fds[i].fd < 0) {
1472 fds[i].revents = 0;
1473 continue;
1474 }
1475
1476 /* convert the poll event into a kqueue kevent */
1477 kev.ident = fds[i].fd;
1478 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
1479 kev.fflags = NOTE_LOWAT;
1480 kev.data = 1; /* efficiency be damned: any data should trigger */
1481 kev.udata = CAST_USER_ADDR_T(&fds[i]);
b0d623f7
A
1482 kev.ext[0] = 0;
1483 kev.ext[1] = 0;
91447636
A
1484
1485 /* Handle input events */
2d21ac55 1486 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
91447636
A
1487 kev.filter = EVFILT_READ;
1488 if (!(events & ( POLLIN | POLLRDNORM )))
1489 kev.flags |= EV_OOBAND;
1490 kerror = kevent_register(kq, &kev, p);
1491 }
1492
1493 /* Handle output events */
1494 if (kerror == 0 &&
1495 events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1496 kev.filter = EVFILT_WRITE;
1497 kerror = kevent_register(kq, &kev, p);
1498 }
1499
1500 /* Handle BSD extension vnode events */
1501 if (kerror == 0 &&
1502 events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1503 kev.filter = EVFILT_VNODE;
1504 kev.fflags = 0;
1505 if (events & POLLEXTEND)
1506 kev.fflags |= NOTE_EXTEND;
1507 if (events & POLLATTRIB)
1508 kev.fflags |= NOTE_ATTRIB;
1509 if (events & POLLNLINK)
1510 kev.fflags |= NOTE_LINK;
1511 if (events & POLLWRITE)
1512 kev.fflags |= NOTE_WRITE;
1513 kerror = kevent_register(kq, &kev, p);
1514 }
1515
1516 if (kerror != 0) {
1517 fds[i].revents = POLLNVAL;
1518 rfds++;
1519 } else
1520 fds[i].revents = 0;
1521 }
1522
1523 /* Did we have any trouble registering? */
1524 if (rfds > 0)
1525 goto done;
1526
1527 /* scan for, and possibly wait for, the kevents to trigger */
1528 cont->pca_fds = uap->fds;
1529 cont->pca_nfds = nfds;
1530 cont->pca_rfds = rfds;
b0d623f7 1531 error = kqueue_scan(kq, poll_callback, NULL, cont, &atv, p);
91447636
A
1532 rfds = cont->pca_rfds;
1533
1534 done:
b0d623f7 1535 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
91447636
A
1536 /* poll is not restarted after signals... */
1537 if (error == ERESTART)
1538 error = EINTR;
1539 if (error == EWOULDBLOCK)
1540 error = 0;
1541 if (error == 0) {
1542 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1543 *retval = rfds;
1544 }
1545
1546 out:
1547 if (NULL != cont)
1548 FREE(cont, M_TEMP);
1549
2d21ac55 1550 kqueue_dealloc(kq);
91447636
A
1551 return (error);
1552}
1553
2d21ac55 1554int
b0d623f7 1555poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
91447636
A
1556{
1557 struct poll_continue_args *cont = (struct poll_continue_args *)data;
1558 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
ff6e181a
A
1559 short mask;
1560
91447636
A
1561 /* convert the results back into revents */
1562 if (kevp->flags & EV_EOF)
1563 fds->revents |= POLLHUP;
1564 if (kevp->flags & EV_ERROR)
1565 fds->revents |= POLLERR;
91447636
A
1566
1567 switch (kevp->filter) {
1568 case EVFILT_READ:
ff6e181a
A
1569 if (fds->revents & POLLHUP)
1570 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1571 else {
1572 mask = 0;
1573 if (kevp->data != 0)
1574 mask |= (POLLIN | POLLRDNORM );
1575 if (kevp->flags & EV_OOBAND)
1576 mask |= ( POLLPRI | POLLRDBAND );
1577 }
1578 fds->revents |= (fds->events & mask);
91447636
A
1579 break;
1580
1581 case EVFILT_WRITE:
1582 if (!(fds->revents & POLLHUP))
1583 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1584 break;
1585
2d21ac55 1586 case EVFILT_VNODE:
91447636
A
1587 if (kevp->fflags & NOTE_EXTEND)
1588 fds->revents |= (fds->events & POLLEXTEND);
1589 if (kevp->fflags & NOTE_ATTRIB)
1590 fds->revents |= (fds->events & POLLATTRIB);
1591 if (kevp->fflags & NOTE_LINK)
1592 fds->revents |= (fds->events & POLLNLINK);
1593 if (kevp->fflags & NOTE_WRITE)
1594 fds->revents |= (fds->events & POLLWRITE);
1595 break;
1596 }
2d21ac55
A
1597
1598 if (fds->revents)
1599 cont->pca_rfds++;
1600
91447636
A
1601 return 0;
1602}
1603
1604int
1605seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1606{
1607
1608 return (1);
1609}
1610
1611static int
1612selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
2d21ac55 1613 int nfd, int *countp, int * kfcountp)
91447636 1614{
2d21ac55
A
1615 struct filedesc *fdp = p->p_fd;
1616 int msk, i, j, fd;
1617 u_int32_t bits;
91447636 1618 struct fileproc *fp;
0b4e3aa0 1619 int n = 0;
91447636 1620 u_int32_t *iptr;
0b4e3aa0 1621 u_int nw;
91447636 1622 int error=0;
2d21ac55 1623 int kfc = 0;
91447636 1624 int dropcount;
2d21ac55 1625 vnode_t vp;
0b4e3aa0
A
1626
1627 /*
1628 * Problems when reboot; due to MacOSX signal probs
1629 * in Beaker1C ; verify that the p->p_fd is valid
1630 */
1631 if (fdp == NULL) {
2d21ac55
A
1632 *countp = 0;
1633 *kfcountp = 0;
0b4e3aa0
A
1634 return(EIO);
1635 }
0b4e3aa0
A
1636 nw = howmany(nfd, NFDBITS);
1637
91447636 1638 proc_fdlock(p);
0b4e3aa0
A
1639 for (msk = 0; msk < 3; msk++) {
1640 iptr = (u_int32_t *)&ibits[msk * nw];
1641 for (i = 0; i < nfd; i += NFDBITS) {
1642 bits = iptr[i/NFDBITS];
1643 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1644 bits &= ~(1 << j);
1645 fp = fdp->fd_ofiles[fd];
1646 if (fp == NULL ||
1647 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
2d21ac55
A
1648 *countp = 0;
1649 *kfcountp = 0;
91447636
A
1650 error = EBADF;
1651 goto bad;
0b4e3aa0 1652 }
91447636 1653 fp->f_iocount++;
2d21ac55
A
1654 if ((fp->f_type == DTYPE_VNODE)
1655 && ((vp = (struct vnode *)fp->f_data) != NULLVP)
1656 && (vp->v_type == VCHR) )
1657 kfc++;
1658
0b4e3aa0
A
1659 n++;
1660 }
1661 }
1662 }
91447636
A
1663 proc_fdunlock(p);
1664
2d21ac55
A
1665 *countp = n;
1666 *kfcountp = kfc;
91447636
A
1667 return (0);
1668bad:
1669 dropcount = 0;
1670
1671 if (n== 0)
1672 goto out;
1673 /* undo the iocounts */
1674 for (msk = 0; msk < 3; msk++) {
1675 iptr = (u_int32_t *)&ibits[msk * nw];
1676 for (i = 0; i < nfd; i += NFDBITS) {
1677 bits = iptr[i/NFDBITS];
1678 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1679 bits &= ~(1 << j);
1680 fp = fdp->fd_ofiles[fd];
1681 if (dropcount >= n)
1682 goto out;
1683 fp->f_iocount--;
1684
1685 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1686 p->p_fpdrainwait = 0;
1687 wakeup(&p->p_fpdrainwait);
1688 }
1689 dropcount++;
1690 }
1691 }
1692 }
1693out:
1694 proc_fdunlock(p);
1695 return(error);
1696}
1697
1698static int
2d21ac55 1699seldrop(struct proc *p, u_int32_t *ibits, int nfd)
91447636 1700{
2d21ac55
A
1701 struct filedesc *fdp = p->p_fd;
1702 int msk, i, j, fd;
1703 u_int32_t bits;
91447636
A
1704 struct fileproc *fp;
1705 int n = 0;
1706 u_int32_t *iptr;
1707 u_int nw;
1708
1709 /*
1710 * Problems when reboot; due to MacOSX signal probs
1711 * in Beaker1C ; verify that the p->p_fd is valid
1712 */
1713 if (fdp == NULL) {
1714 return(EIO);
1715 }
1716
1717 nw = howmany(nfd, NFDBITS);
1718
1719
1720 proc_fdlock(p);
1721 for (msk = 0; msk < 3; msk++) {
1722 iptr = (u_int32_t *)&ibits[msk * nw];
1723 for (i = 0; i < nfd; i += NFDBITS) {
1724 bits = iptr[i/NFDBITS];
1725 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1726 bits &= ~(1 << j);
1727 fp = fdp->fd_ofiles[fd];
1728 if (fp == NULL
1729#if 0
1730 /* if you are here then it is being closed */
1731 || (fdp->fd_ofileflags[fd] & UF_RESERVED)
1732#endif
1733 ) {
1734 proc_fdunlock(p);
1735 return(EBADF);
1736 }
1737 n++;
1738 fp->f_iocount--;
1739 fp->f_flags &= ~FP_INSELECT;
1740
1741 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1742 p->p_fpdrainwait = 0;
1743 wakeup(&p->p_fpdrainwait);
1744 }
1745 }
1746 }
1747 }
1748 proc_fdunlock(p);
0b4e3aa0
A
1749 return (0);
1750}
1751
1c79356b
A
1752/*
1753 * Record a select request.
1754 */
1755void
91447636 1756selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1c79356b 1757{
91447636 1758 thread_t cur_act = current_thread();
0b4e3aa0 1759 struct uthread * ut = get_bsdthread_info(cur_act);
1c79356b 1760
0b4e3aa0
A
1761 /* need to look at collisions */
1762
1763 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1c79356b
A
1764 return;
1765 }
0b4e3aa0
A
1766
1767 /*do not record if this is second pass of select */
1768 if((p_wql == (void *)0)) {
1769 return;
1c79356b
A
1770 }
1771
0b4e3aa0 1772 if ((sip->si_flags & SI_INITED) == 0) {
55e303ae 1773 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
0b4e3aa0
A
1774 sip->si_flags |= SI_INITED;
1775 sip->si_flags &= ~SI_CLEAR;
1776 }
1777
1778 if (sip->si_flags & SI_RECORDED) {
1779 sip->si_flags |= SI_COLL;
1780 } else
1781 sip->si_flags &= ~SI_COLL;
1782
1783 sip->si_flags |= SI_RECORDED;
2d21ac55
A
1784 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
1785 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
91447636 1786 (wait_queue_link_t)p_wql);
0b4e3aa0 1787
1c79356b
A
1788 return;
1789}
1790
1791void
2d21ac55 1792selwakeup(struct selinfo *sip)
1c79356b 1793{
1c79356b 1794
0b4e3aa0 1795 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 1796 return;
0b4e3aa0 1797 }
1c79356b
A
1798
1799 if (sip->si_flags & SI_COLL) {
1800 nselcoll++;
1801 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
1802#if 0
1803 /* will not support */
1804 //wakeup((caddr_t)&selwait);
1805#endif
1c79356b 1806 }
1c79356b 1807
0b4e3aa0 1808 if (sip->si_flags & SI_RECORDED) {
b0d623f7 1809 wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED);
0b4e3aa0 1810 sip->si_flags &= ~SI_RECORDED;
1c79356b 1811 }
1c79356b 1812
1c79356b
A
1813}
1814
1815void
2d21ac55 1816selthreadclear(struct selinfo *sip)
1c79356b 1817{
1c79356b 1818
0b4e3aa0
A
1819 if ((sip->si_flags & SI_INITED) == 0) {
1820 return;
1821 }
1822 if (sip->si_flags & SI_RECORDED) {
1823 selwakeup(sip);
1824 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 1825 }
0b4e3aa0 1826 sip->si_flags |= SI_CLEAR;
b0d623f7 1827 wait_queue_unlink_all(&sip->si_wait_queue);
1c79356b
A
1828}
1829
1830
91447636
A
1831
1832
91447636
A
1833#define DBG_POST 0x10
1834#define DBG_WATCH 0x11
1835#define DBG_WAIT 0x12
1836#define DBG_MOD 0x13
1837#define DBG_EWAKEUP 0x14
1838#define DBG_ENQUEUE 0x15
1839#define DBG_DEQUEUE 0x16
1840
1841#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1842#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1843#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1844#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1845#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1846#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1847#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1848
1849
1850#define EVPROCDEQUE(p, evq) do { \
1851 proc_lock(p); \
1852 if (evq->ee_flags & EV_QUEUED) { \
1853 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \
1854 evq->ee_flags &= ~EV_QUEUED; \
1855 } \
1856 proc_unlock(p); \
1857} while (0);
1858
1c79356b
A
1859
1860/*
1861 * called upon socket close. deque and free all events for
91447636 1862 * the socket... socket must be locked by caller.
1c79356b 1863 */
9bccf70c 1864void
1c79356b
A
1865evsofree(struct socket *sp)
1866{
91447636
A
1867 struct eventqelt *evq, *next;
1868 proc_t p;
1869
1870 if (sp == NULL)
1871 return;
1c79356b 1872
91447636
A
1873 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1874 next = evq->ee_slist.tqe_next;
1875 p = evq->ee_proc;
1c79356b 1876
91447636
A
1877 if (evq->ee_flags & EV_QUEUED) {
1878 EVPROCDEQUE(p, evq);
1879 }
1880 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1881 FREE(evq, M_TEMP);
1882 }
1c79356b
A
1883}
1884
1885
91447636
A
1886/*
1887 * called upon pipe close. deque and free all events for
1888 * the pipe... pipe must be locked by caller
1889 */
1890void
1891evpipefree(struct pipe *cpipe)
1892{
1893 struct eventqelt *evq, *next;
1894 proc_t p;
1c79356b 1895
91447636
A
1896 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1897 next = evq->ee_slist.tqe_next;
1898 p = evq->ee_proc;
1c79356b 1899
91447636
A
1900 EVPROCDEQUE(p, evq);
1901
1902 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1903 FREE(evq, M_TEMP);
1904 }
1905}
1c79356b
A
1906
1907
1908/*
91447636
A
1909 * enqueue this event if it's not already queued. wakeup
1910 * the proc if we do queue this event to it...
1911 * entered with proc lock held... we drop it before
1912 * doing the wakeup and return in that state
1c79356b 1913 */
91447636
A
1914static void
1915evprocenque(struct eventqelt *evq)
1c79356b 1916{
91447636
A
1917 proc_t p;
1918
1919 assert(evq);
1920 p = evq->ee_proc;
1921
2d21ac55 1922 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
91447636
A
1923
1924 proc_lock(p);
1925
1926 if (evq->ee_flags & EV_QUEUED) {
1927 proc_unlock(p);
1928
1929 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1930 return;
1931 }
1932 evq->ee_flags |= EV_QUEUED;
1933
1934 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
1935
1936 proc_unlock(p);
1937
1938 wakeup(&p->p_evlist);
1939
1940 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1c79356b
A
1941}
1942
91447636 1943
1c79356b 1944/*
91447636 1945 * pipe lock must be taken by the caller
1c79356b 1946 */
9bccf70c 1947void
91447636 1948postpipeevent(struct pipe *pipep, int event)
1c79356b 1949{
91447636
A
1950 int mask;
1951 struct eventqelt *evq;
1952
1953 if (pipep == NULL)
1954 return;
1955 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
1956
1957 for (evq = pipep->pipe_evlist.tqh_first;
1958 evq != NULL; evq = evq->ee_slist.tqe_next) {
1959
1960 if (evq->ee_eventmask == 0)
1961 continue;
1962 mask = 0;
1963
1964 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
1965
1966 case EV_RWBYTES:
1967 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
1968 mask |= EV_RE;
1969 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
1970 }
1971 if ((evq->ee_eventmask & EV_WR) &&
1972 (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
1973
1974 if (pipep->pipe_state & PIPE_EOF) {
1975 mask |= EV_WR|EV_RESET;
1976 break;
1977 }
1978 mask |= EV_WR;
1979 evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
1980 }
1981 break;
1982
1983 case EV_WCLOSED:
1984 case EV_RCLOSED:
1985 if ((evq->ee_eventmask & EV_RE)) {
1986 mask |= EV_RE|EV_RCLOSED;
1987 }
1988 if ((evq->ee_eventmask & EV_WR)) {
1989 mask |= EV_WR|EV_WCLOSED;
1990 }
1991 break;
1992
1993 default:
1994 return;
1995 }
1996 if (mask) {
1997 /*
1998 * disarm... postevents are nops until this event is 'read' via
1999 * waitevent and then re-armed via modwatch
2000 */
2001 evq->ee_eventmask = 0;
2002
2003 /*
2004 * since events are disarmed until after the waitevent
2005 * the ee_req.er_xxxx fields can't change once we've
2006 * inserted this event into the proc queue...
2007 * therefore, the waitevent will see a 'consistent'
2008 * snapshot of the event, even though it won't hold
2009 * the pipe lock, and we're updating the event outside
2010 * of the proc lock, which it will hold
2011 */
2012 evq->ee_req.er_eventbits |= mask;
2013
2d21ac55 2014 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
91447636
A
2015
2016 evprocenque(evq);
2017 }
2018 }
2019 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
1c79356b
A
2020}
2021
2d21ac55 2022#if SOCKETS
1c79356b 2023/*
91447636
A
2024 * given either a sockbuf or a socket run down the
2025 * event list and queue ready events found...
2026 * the socket must be locked by the caller
1c79356b 2027 */
91447636
A
2028void
2029postevent(struct socket *sp, struct sockbuf *sb, int event)
1c79356b 2030{
91447636
A
2031 int mask;
2032 struct eventqelt *evq;
2033 struct tcpcb *tp;
2034
2035 if (sb)
2036 sp = sb->sb_so;
2037 if (sp == NULL)
2038 return;
2039
2040 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2041
2042 for (evq = sp->so_evlist.tqh_first;
2043 evq != NULL; evq = evq->ee_slist.tqe_next) {
2044
2045 if (evq->ee_eventmask == 0)
2046 continue;
2047 mask = 0;
2048
2049 /* ready for reading:
2050 - byte cnt >= receive low water mark
2051 - read-half of conn closed
2052 - conn pending for listening sock
2053 - socket error pending
2054
2055 ready for writing
2056 - byte cnt avail >= send low water mark
2057 - write half of conn closed
2058 - socket error pending
2059 - non-blocking conn completed successfully
2060
2061 exception pending
2062 - out of band data
2063 - sock at out of band mark
2064 */
2065
2066 switch (event & EV_DMASK) {
2067
2068 case EV_OOB:
2069 if ((evq->ee_eventmask & EV_EX)) {
2070 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2071 mask |= EV_EX|EV_OOB;
2072 }
2073 break;
2074
2075 case EV_RWBYTES|EV_OOB:
2076 if ((evq->ee_eventmask & EV_EX)) {
2077 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2078 mask |= EV_EX|EV_OOB;
2079 }
2080 /*
2081 * fall into the next case
2082 */
2083 case EV_RWBYTES:
2084 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
2085 if (sp->so_error) {
2086 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2087 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2088 (tp->t_state == TCPS_CLOSED)) {
2089 mask |= EV_RE|EV_RESET;
2090 break;
2091 }
2092 }
2093 }
2094 mask |= EV_RE;
2095 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2096
2097 if (sp->so_state & SS_CANTRCVMORE) {
2098 mask |= EV_FIN;
2099 break;
2100 }
2101 }
2102 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
2103 if (sp->so_error) {
2104 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2105 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2106 (tp->t_state == TCPS_CLOSED)) {
2107 mask |= EV_WR|EV_RESET;
2108 break;
2109 }
2110 }
2111 }
2112 mask |= EV_WR;
2113 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2114 }
2115 break;
2116
2117 case EV_RCONN:
2118 if ((evq->ee_eventmask & EV_RE)) {
2119 mask |= EV_RE|EV_RCONN;
2120 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
2121 }
2122 break;
2123
2124 case EV_WCONN:
2125 if ((evq->ee_eventmask & EV_WR)) {
2126 mask |= EV_WR|EV_WCONN;
2127 }
2128 break;
2129
2130 case EV_RCLOSED:
2131 if ((evq->ee_eventmask & EV_RE)) {
2132 mask |= EV_RE|EV_RCLOSED;
2133 }
2134 break;
2135
2136 case EV_WCLOSED:
2137 if ((evq->ee_eventmask & EV_WR)) {
2138 mask |= EV_WR|EV_WCLOSED;
2139 }
2140 break;
2141
2142 case EV_FIN:
2143 if (evq->ee_eventmask & EV_RE) {
2144 mask |= EV_RE|EV_FIN;
2145 }
2146 break;
2147
2148 case EV_RESET:
2149 case EV_TIMEOUT:
2150 if (evq->ee_eventmask & EV_RE) {
2151 mask |= EV_RE | event;
2152 }
2153 if (evq->ee_eventmask & EV_WR) {
2154 mask |= EV_WR | event;
2155 }
2156 break;
2157
2158 default:
2159 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2160 return;
2161 } /* switch */
2162
2163 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2164
2165 if (mask) {
2166 /*
2167 * disarm... postevents are nops until this event is 'read' via
2168 * waitevent and then re-armed via modwatch
2169 */
2170 evq->ee_eventmask = 0;
2171
2172 /*
2173 * since events are disarmed until after the waitevent
2174 * the ee_req.er_xxxx fields can't change once we've
2175 * inserted this event into the proc queue...
2176 * since waitevent can't see this event until we
2177 * enqueue it, waitevent will see a 'consistent'
2178 * snapshot of the event, even though it won't hold
2179 * the socket lock, and we're updating the event outside
2180 * of the proc lock, which it will hold
2181 */
2182 evq->ee_req.er_eventbits |= mask;
2183
2184 evprocenque(evq);
2185 }
2186 }
2187 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
1c79356b 2188}
2d21ac55 2189#endif /* SOCKETS */
1c79356b 2190
1c79356b
A
2191
2192/*
2193 * watchevent system call. user passes us an event to watch
2194 * for. we malloc an event object, initialize it, and queue
2195 * it to the open socket. when the event occurs, postevent()
2196 * will enque it back to our proc where we can retrieve it
2197 * via waitevent().
2198 *
2199 * should this prevent duplicate events on same socket?
2d21ac55
A
2200 *
2201 * Returns:
2202 * ENOMEM No memory for operation
2203 * copyin:EFAULT
1c79356b
A
2204 */
2205int
91447636 2206watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
1c79356b 2207{
91447636
A
2208 struct eventqelt *evq = (struct eventqelt *)0;
2209 struct eventqelt *np = NULL;
2d21ac55 2210 struct eventreq64 *erp;
91447636
A
2211 struct fileproc *fp = NULL;
2212 int error;
2213
2214 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2215
2216 // get a qelt and fill with users req
2217 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2218
2219 if (evq == NULL)
2d21ac55 2220 return (ENOMEM);
91447636
A
2221 erp = &evq->ee_req;
2222
2223 // get users request pkt
91447636 2224
2d21ac55
A
2225 if (IS_64BIT_PROCESS(p)) {
2226 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2227 } else {
2228 struct eventreq32 er32;
2229
2230 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2231 if (error == 0) {
2232 /*
2233 * the user only passes in the
2234 * er_type, er_handle and er_data...
2235 * the other fields are initialized
2236 * below, so don't bother to copy
2237 */
2238 erp->er_type = er32.er_type;
2239 erp->er_handle = er32.er_handle;
2240 erp->er_data = (user_addr_t)er32.er_data;
2241 }
2242 }
2243 if (error) {
2244 FREE(evq, M_TEMP);
91447636 2245 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2d21ac55
A
2246
2247 return(error);
91447636 2248 }
2d21ac55 2249 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2250
2251 // validate, freeing qelt if errors
2252 error = 0;
2253 proc_fdlock(p);
2254
2255 if (erp->er_type != EV_FD) {
2256 error = EINVAL;
2257 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2258 error = EBADF;
2d21ac55 2259#if SOCKETS
91447636
A
2260 } else if (fp->f_type == DTYPE_SOCKET) {
2261 socket_lock((struct socket *)fp->f_data, 1);
2262 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2263#endif /* SOCKETS */
91447636
A
2264 } else if (fp->f_type == DTYPE_PIPE) {
2265 PIPE_LOCK((struct pipe *)fp->f_data);
2266 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2267 } else {
2268 fp_drop(p, erp->er_handle, fp, 1);
2269 error = EINVAL;
2270 }
2271 proc_fdunlock(p);
2272
2273 if (error) {
2274 FREE(evq, M_TEMP);
2275
2276 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2277 return(error);
2278 }
2279
2280 /*
2281 * only allow one watch per file per proc
2282 */
2283 for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2284 if (np->ee_proc == p) {
2d21ac55 2285#if SOCKETS
91447636
A
2286 if (fp->f_type == DTYPE_SOCKET)
2287 socket_unlock((struct socket *)fp->f_data, 1);
2288 else
2d21ac55 2289#endif /* SOCKETS */
91447636
A
2290 PIPE_UNLOCK((struct pipe *)fp->f_data);
2291 fp_drop(p, erp->er_handle, fp, 0);
2292 FREE(evq, M_TEMP);
2293
2294 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2295 return(EINVAL);
2296 }
2297 }
2298 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2299 evq->ee_proc = p;
2300 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2301 evq->ee_flags = 0;
2302
2d21ac55 2303#if SOCKETS
91447636
A
2304 if (fp->f_type == DTYPE_SOCKET) {
2305 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2306 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2307
2308 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2309 } else
2310#endif /* SOCKETS */
2311 {
91447636
A
2312 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2313 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2314
2315 PIPE_UNLOCK((struct pipe *)fp->f_data);
2316 }
2317 fp_drop_event(p, erp->er_handle, fp);
2318
2319 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2320 return(0);
1c79356b
A
2321}
2322
91447636 2323
1c79356b
A
2324
2325/*
2326 * waitevent system call.
2327 * grabs the next waiting event for this proc and returns
2328 * it. if no events, user can request to sleep with timeout
2d21ac55
A
2329 * or without or poll mode
2330 * ((tv != NULL && interval == 0) || tv == -1)
1c79356b
A
2331 */
2332int
91447636 2333waitevent(proc_t p, struct waitevent_args *uap, int *retval)
1c79356b 2334{
91447636
A
2335 int error = 0;
2336 struct eventqelt *evq;
2d21ac55 2337 struct eventreq64 *erp;
9bccf70c 2338 uint64_t abstime, interval;
2d21ac55
A
2339 boolean_t fast_poll = FALSE;
2340 union {
2341 struct eventreq64 er64;
2342 struct eventreq32 er32;
2343 } uer;
2344
2345 interval = 0;
1c79356b
A
2346
2347 if (uap->tv) {
9bccf70c 2348 struct timeval atv;
2d21ac55
A
2349 /*
2350 * check for fast poll method
2351 */
2352 if (IS_64BIT_PROCESS(p)) {
2353 if (uap->tv == (user_addr_t)-1)
2354 fast_poll = TRUE;
2355 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2356 fast_poll = TRUE;
2357
2358 if (fast_poll == TRUE) {
2359 if (p->p_evlist.tqh_first == NULL) {
2360 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2361 /*
2362 * poll failed
2363 */
2364 *retval = 1;
2365 return (0);
2366 }
2367 proc_lock(p);
2368 goto retry;
2369 }
b0d623f7
A
2370 if (IS_64BIT_PROCESS(p)) {
2371 struct user64_timeval atv64;
2372 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
2373 /* Loses resolution - assume timeout < 68 years */
2374 atv.tv_sec = atv64.tv_sec;
2375 atv.tv_usec = atv64.tv_usec;
2376 } else {
2377 struct user32_timeval atv32;
2378 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
2379 atv.tv_sec = atv32.tv_sec;
2380 atv.tv_usec = atv32.tv_usec;
2381 }
9bccf70c 2382
1c79356b 2383 if (error)
9bccf70c 2384 return(error);
1c79356b
A
2385 if (itimerfix(&atv)) {
2386 error = EINVAL;
2387 return(error);
2388 }
9bccf70c 2389 interval = tvtoabstime(&atv);
2d21ac55 2390 }
9bccf70c 2391 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
1c79356b 2392
91447636 2393 proc_lock(p);
1c79356b 2394retry:
91447636
A
2395 if ((evq = p->p_evlist.tqh_first) != NULL) {
2396 /*
2397 * found one... make a local copy while it's still on the queue
2398 * to prevent it from changing while in the midst of copying
2399 * don't want to hold the proc lock across a copyout because
2400 * it might block on a page fault at the target in user space
2401 */
2d21ac55 2402 erp = &evq->ee_req;
91447636 2403
2d21ac55
A
2404 if (IS_64BIT_PROCESS(p))
2405 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2406 else {
2407 uer.er32.er_type = erp->er_type;
2408 uer.er32.er_handle = erp->er_handle;
2409 uer.er32.er_data = (uint32_t)erp->er_data;
2410 uer.er32.er_ecnt = erp->er_ecnt;
2411 uer.er32.er_rcnt = erp->er_rcnt;
2412 uer.er32.er_wcnt = erp->er_wcnt;
2413 uer.er32.er_eventbits = erp->er_eventbits;
2414 }
91447636
A
2415 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2416
2417 evq->ee_flags &= ~EV_QUEUED;
1c79356b 2418
91447636
A
2419 proc_unlock(p);
2420
2d21ac55
A
2421 if (IS_64BIT_PROCESS(p))
2422 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2423 else
2424 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
91447636
A
2425
2426 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2d21ac55 2427 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
9bccf70c
A
2428 return (error);
2429 }
2430 else {
2431 if (uap->tv && interval == 0) {
91447636 2432 proc_unlock(p);
9bccf70c 2433 *retval = 1; // poll failed
9bccf70c 2434
91447636 2435 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
9bccf70c
A
2436 return (error);
2437 }
9bccf70c 2438 if (interval != 0)
55e303ae 2439 clock_absolutetime_interval_to_deadline(interval, &abstime);
91447636
A
2440 else
2441 abstime = 0;
9bccf70c 2442
2d21ac55 2443 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
91447636
A
2444
2445 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2446
2d21ac55 2447 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
91447636 2448
9bccf70c
A
2449 if (error == 0)
2450 goto retry;
2451 if (error == ERESTART)
2452 error = EINTR;
2453 if (error == EWOULDBLOCK) {
2454 *retval = 1;
2455 error = 0;
2456 }
2457 }
91447636 2458 proc_unlock(p);
9bccf70c
A
2459
2460 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
9bccf70c 2461 return (error);
1c79356b
A
2462}
2463
1c79356b
A
2464
2465/*
2466 * modwatch system call. user passes in event to modify.
2467 * if we find it we reset the event bits and que/deque event
2468 * it needed.
2469 */
2470int
91447636 2471modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
1c79356b 2472{
2d21ac55
A
2473 struct eventreq64 er;
2474 struct eventreq64 *erp = &er;
2475 struct eventqelt *evq = NULL; /* protected by error return */
91447636
A
2476 int error;
2477 struct fileproc *fp;
2478 int flag;
2479
2480 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2481
2482 /*
2483 * get user's request pkt
2d21ac55
A
2484 * just need the er_type and er_handle which sit above the
2485 * problematic er_data (32/64 issue)... so only copy in
2486 * those 2 fields
91447636 2487 */
2d21ac55
A
2488 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2489 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
91447636
A
2490 return(error);
2491 }
2492 proc_fdlock(p);
2493
2494 if (erp->er_type != EV_FD) {
2495 error = EINVAL;
2496 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2497 error = EBADF;
2d21ac55 2498#if SOCKETS
91447636
A
2499 } else if (fp->f_type == DTYPE_SOCKET) {
2500 socket_lock((struct socket *)fp->f_data, 1);
2501 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55 2502#endif /* SOCKETS */
91447636
A
2503 } else if (fp->f_type == DTYPE_PIPE) {
2504 PIPE_LOCK((struct pipe *)fp->f_data);
2505 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2506 } else {
2507 fp_drop(p, erp->er_handle, fp, 1);
2508 error = EINVAL;
2509 }
2510
2511 if (error) {
2512 proc_fdunlock(p);
2513 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2514 return(error);
2515 }
2516
2517 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2518 fp->f_flags &= ~FP_WAITEVENT;
2519 }
2520 proc_fdunlock(p);
2521
2522 // locate event if possible
2523 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2524 if (evq->ee_proc == p)
2525 break;
2526 }
2527 if (evq == NULL) {
2d21ac55 2528#if SOCKETS
91447636
A
2529 if (fp->f_type == DTYPE_SOCKET)
2530 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2531 else
2532#endif /* SOCKETS */
91447636
A
2533 PIPE_UNLOCK((struct pipe *)fp->f_data);
2534 fp_drop(p, erp->er_handle, fp, 0);
2535 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2536 return(EINVAL);
2537 }
2d21ac55 2538 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
91447636
A
2539
2540 if (uap->u_eventmask == EV_RM) {
2541 EVPROCDEQUE(p, evq);
2542
2d21ac55 2543#if SOCKETS
91447636
A
2544 if (fp->f_type == DTYPE_SOCKET) {
2545 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2546 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2547 } else
2548#endif /* SOCKETS */
2549 {
91447636
A
2550 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2551 PIPE_UNLOCK((struct pipe *)fp->f_data);
2552 }
2553 fp_drop(p, erp->er_handle, fp, 0);
2554 FREE(evq, M_TEMP);
2555 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2556 return(0);
2557 }
2558 switch (uap->u_eventmask & EV_MASK) {
1c79356b 2559
91447636
A
2560 case 0:
2561 flag = 0;
2562 break;
2563
2564 case EV_RE:
2565 case EV_WR:
2566 case EV_RE|EV_WR:
2567 flag = EV_RWBYTES;
2568 break;
2569
2570 case EV_EX:
2571 flag = EV_OOB;
2572 break;
2573
2574 case EV_EX|EV_RE:
2575 case EV_EX|EV_WR:
2576 case EV_EX|EV_RE|EV_WR:
2577 flag = EV_OOB|EV_RWBYTES;
2578 break;
2579
2580 default:
2d21ac55 2581#if SOCKETS
91447636
A
2582 if (fp->f_type == DTYPE_SOCKET)
2583 socket_unlock((struct socket *)fp->f_data, 1);
2584 else
2d21ac55 2585#endif /* SOCKETS */
91447636
A
2586 PIPE_UNLOCK((struct pipe *)fp->f_data);
2587 fp_drop(p, erp->er_handle, fp, 0);
2588 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2589 return(EINVAL);
2590 }
2591 /*
2592 * since we're holding the socket/pipe lock, the event
2593 * cannot go from the unqueued state to the queued state
2594 * however, it can go from the queued state to the unqueued state
2595 * since that direction is protected by the proc_lock...
2596 * so do a quick check for EV_QUEUED w/o holding the proc lock
2597 * since by far the common case will be NOT EV_QUEUED, this saves
2598 * us taking the proc_lock the majority of the time
2599 */
2600 if (evq->ee_flags & EV_QUEUED) {
2601 /*
2602 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2603 */
2604 EVPROCDEQUE(p, evq);
2605 }
2606 /*
2607 * while the event is off the proc queue and
2608 * we're holding the socket/pipe lock
2609 * it's safe to update these fields...
2610 */
2611 evq->ee_req.er_eventbits = 0;
2612 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2613
2d21ac55 2614#if SOCKETS
91447636
A
2615 if (fp->f_type == DTYPE_SOCKET) {
2616 postevent((struct socket *)fp->f_data, 0, flag);
2617 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2618 } else
2619#endif /* SOCKETS */
2620 {
91447636
A
2621 postpipeevent((struct pipe *)fp->f_data, flag);
2622 PIPE_UNLOCK((struct pipe *)fp->f_data);
2623 }
2624 fp_drop(p, erp->er_handle, fp, 0);
2d21ac55 2625 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
91447636 2626 return(0);
1c79356b 2627}
91447636
A
2628
2629/* this routine is called from the close of fd with proc_fdlock held */
2630int
2631waitevent_close(struct proc *p, struct fileproc *fp)
2632{
2633 struct eventqelt *evq;
2634
2635
2636 fp->f_flags &= ~FP_WAITEVENT;
2637
2d21ac55 2638#if SOCKETS
91447636
A
2639 if (fp->f_type == DTYPE_SOCKET) {
2640 socket_lock((struct socket *)fp->f_data, 1);
2641 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2d21ac55
A
2642 } else
2643#endif /* SOCKETS */
2644 if (fp->f_type == DTYPE_PIPE) {
91447636
A
2645 PIPE_LOCK((struct pipe *)fp->f_data);
2646 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2647 }
2648 else {
2649 return(EINVAL);
2650 }
2651 proc_fdunlock(p);
2652
2653
2654 // locate event if possible
2655 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2656 if (evq->ee_proc == p)
2657 break;
2658 }
2659 if (evq == NULL) {
2d21ac55 2660#if SOCKETS
91447636
A
2661 if (fp->f_type == DTYPE_SOCKET)
2662 socket_unlock((struct socket *)fp->f_data, 1);
2663 else
2d21ac55 2664#endif /* SOCKETS */
91447636
A
2665 PIPE_UNLOCK((struct pipe *)fp->f_data);
2666
2667 proc_fdlock(p);
2668
2669 return(EINVAL);
2670 }
2671 EVPROCDEQUE(p, evq);
2672
2d21ac55 2673#if SOCKETS
91447636
A
2674 if (fp->f_type == DTYPE_SOCKET) {
2675 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2676 socket_unlock((struct socket *)fp->f_data, 1);
2d21ac55
A
2677 } else
2678#endif /* SOCKETS */
2679 {
91447636
A
2680 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2681 PIPE_UNLOCK((struct pipe *)fp->f_data);
2682 }
2683 FREE(evq, M_TEMP);
2684
2685 proc_fdlock(p);
2686
2687 return(0);
2688}
2689
2d21ac55
A
2690
2691/*
2692 * gethostuuid
2693 *
2694 * Description: Get the host UUID from IOKit and return it to user space.
2695 *
2696 * Parameters: uuid_buf Pointer to buffer to receive UUID
2697 * timeout Timespec for timout
2698 *
2699 * Returns: 0 Success
2700 * EWOULDBLOCK Timeout is too short
2701 * copyout:EFAULT Bad user buffer
2702 *
2703 * Notes: A timeout seems redundant, since if it's tolerable to not
2704 * have a system UUID in hand, then why ask for one?
2705 */
2706int
b0d623f7 2707gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2d21ac55
A
2708{
2709 kern_return_t kret;
2710 int error;
2711 mach_timespec_t mach_ts; /* for IOKit call */
2712 __darwin_uuid_t uuid_kern; /* for IOKit call */
2713
2714 /* Convert the 32/64 bit timespec into a mach_timespec_t */
2715 if ( proc_is64bit(p) ) {
b0d623f7 2716 struct user64_timespec ts;
2d21ac55
A
2717 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2718 if (error)
2719 return (error);
2720 mach_ts.tv_sec = ts.tv_sec;
2721 mach_ts.tv_nsec = ts.tv_nsec;
2722 } else {
b0d623f7 2723 struct user32_timespec ts;
2d21ac55
A
2724 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
2725 if (error)
2726 return (error);
2727 mach_ts.tv_sec = ts.tv_sec;
2728 mach_ts.tv_nsec = ts.tv_nsec;
2729 }
2730
2731 /* Call IOKit with the stack buffer to get the UUID */
2732 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2733
2734 /*
2735 * If we get it, copy out the data to the user buffer; note that a
2736 * uuid_t is an array of characters, so this is size invariant for
2737 * 32 vs. 64 bit.
2738 */
2739 if (kret == KERN_SUCCESS) {
2740 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2741 } else {
2742 error = EWOULDBLOCK;
2743 }
2744
2745 return (error);
2746}