]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_generic.c
bb215dfdd78a36842c5f1391c4a974203d2efdca
[apple/xnu.git] / bsd / kern / sys_generic.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/filedesc.h>
72 #include <sys/ioctl.h>
73 #include <sys/file_internal.h>
74 #include <sys/proc_internal.h>
75 #include <sys/socketvar.h>
76 #if KTRACE
77 #include <sys/uio_internal.h>
78 #else
79 #include <sys/uio.h>
80 #endif
81 #include <sys/kernel.h>
82 #include <sys/stat.h>
83 #include <sys/malloc.h>
84 #include <sys/sysproto.h>
85
86 #include <sys/mount_internal.h>
87 #include <sys/protosw.h>
88 #include <sys/ev.h>
89 #include <sys/user.h>
90 #include <sys/kdebug.h>
91 #include <sys/poll.h>
92 #include <sys/event.h>
93 #include <sys/eventvar.h>
94
95 #include <mach/mach_types.h>
96 #include <kern/kern_types.h>
97 #include <kern/assert.h>
98 #include <kern/kalloc.h>
99 #include <kern/thread.h>
100 #include <kern/clock.h>
101
102 #include <sys/mbuf.h>
103 #include <sys/socket.h>
104 #include <sys/socketvar.h>
105 #include <sys/errno.h>
106 #include <sys/syscall.h>
107 #include <sys/pipe.h>
108
109 #include <bsm/audit_kernel.h>
110
111 #include <net/if.h>
112 #include <net/route.h>
113
114 #include <netinet/in.h>
115 #include <netinet/in_systm.h>
116 #include <netinet/ip.h>
117 #include <netinet/in_pcb.h>
118 #include <netinet/ip_var.h>
119 #include <netinet/ip6.h>
120 #include <netinet/tcp.h>
121 #include <netinet/tcp_fsm.h>
122 #include <netinet/tcp_seq.h>
123 #include <netinet/tcp_timer.h>
124 #include <netinet/tcp_var.h>
125 #include <netinet/tcpip.h>
126 #include <netinet/tcp_debug.h>
127 /* for wait queue based select */
128 #include <kern/wait_queue.h>
129 #include <kern/kalloc.h>
130 #if KTRACE
131 #include <sys/ktrace.h>
132 #endif
133 #include <sys/vnode_internal.h>
134
135 int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
136 int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
137 extern void *get_bsduthreadarg(thread_t);
138 extern int *get_bsduthreadrval(thread_t);
139
140 __private_extern__ int dofileread(struct proc *p, struct fileproc *fp, int fd,
141 user_addr_t bufp, user_size_t nbyte,
142 off_t offset, int flags, user_ssize_t *retval);
143 __private_extern__ int dofilewrite(struct proc *p, struct fileproc *fp, int fd,
144 user_addr_t bufp, user_size_t nbyte,
145 off_t offset, int flags, user_ssize_t *retval);
146 __private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
147 __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
148
149 #if NETAT
150 extern int appletalk_inited;
151 #endif /* NETAT */
152
153 #define f_flag f_fglob->fg_flag
154 #define f_type f_fglob->fg_type
155 #define f_msgcount f_fglob->fg_msgcount
156 #define f_cred f_fglob->fg_cred
157 #define f_ops f_fglob->fg_ops
158 #define f_offset f_fglob->fg_offset
159 #define f_data f_fglob->fg_data
160 /*
161 * Read system call.
162 */
163 int
164 read(p, uap, retval)
165 struct proc *p;
166 register struct read_args *uap;
167 user_ssize_t *retval;
168 {
169 struct fileproc *fp;
170 int error;
171 int fd = uap->fd;
172
173 if ( (error = preparefileread(p, &fp, fd, 0)) )
174 return (error);
175
176 error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte,
177 (off_t)-1, 0, retval);
178
179 donefileread(p, fp, fd);
180
181 return (error);
182 }
183
184 /*
185 * Pread system call
186 */
187 int
188 pread(p, uap, retval)
189 struct proc *p;
190 register struct pread_args *uap;
191 user_ssize_t *retval;
192 {
193 struct fileproc *fp;
194 int fd = uap->fd;
195 int error;
196
197 if ( (error = preparefileread(p, &fp, fd, 1)) )
198 return (error);
199
200 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
201 uap->offset, FOF_OFFSET, retval);
202
203 donefileread(p, fp, fd);
204
205 if (!error)
206 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
207 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
208
209 return (error);
210 }
211
212 /*
213 * Code common for read and pread
214 */
215
216 void
217 donefileread(struct proc *p, struct fileproc *fp, int fd)
218 {
219 proc_fdlock(p);
220
221 fp->f_flags &= ~FP_INCHRREAD;
222
223 fp_drop(p, fd, fp, 1);
224 proc_fdunlock(p);
225 }
226
227 int
228 preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
229 {
230 vnode_t vp;
231 int error;
232 struct fileproc *fp;
233
234 proc_fdlock(p);
235
236 error = fp_lookup(p, fd, &fp, 1);
237
238 if (error) {
239 proc_fdunlock(p);
240 return (error);
241 }
242 if ((fp->f_flag & FREAD) == 0) {
243 error = EBADF;
244 goto out;
245 }
246 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
247 error = ESPIPE;
248 goto out;
249 }
250 if (fp->f_type == DTYPE_VNODE) {
251 vp = (struct vnode *)fp->f_fglob->fg_data;
252
253 if (vp->v_type == VCHR)
254 fp->f_flags |= FP_INCHRREAD;
255 }
256
257 *fp_ret = fp;
258
259 proc_fdunlock(p);
260 return (0);
261
262 out:
263 fp_drop(p, fd, fp, 1);
264 proc_fdunlock(p);
265 return (error);
266 }
267
268
269 __private_extern__ int
270 dofileread(p, fp, fd, bufp, nbyte, offset, flags, retval)
271 struct proc *p;
272 struct fileproc *fp;
273 int fd, flags;
274 user_addr_t bufp;
275 user_size_t nbyte;
276 off_t offset;
277 user_ssize_t *retval;
278 {
279 uio_t auio;
280 user_ssize_t bytecnt;
281 long error = 0;
282 char uio_buf[ UIO_SIZEOF(1) ];
283 #if KTRACE
284 uio_t ktruio = NULL;
285 char ktr_uio_buf[ UIO_SIZEOF(1) ];
286 int didktr = 0;
287 #endif
288
289 // LP64todo - do we want to raise this?
290 if (nbyte > INT_MAX)
291 return (EINVAL);
292
293 if (IS_64BIT_PROCESS(p)) {
294 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
295 &uio_buf[0], sizeof(uio_buf));
296 } else {
297 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
298 &uio_buf[0], sizeof(uio_buf));
299 }
300 uio_addiov(auio, bufp, nbyte);
301
302 #if KTRACE
303 /*
304 * if tracing, save a copy of iovec
305 */
306 if (KTRPOINT(p, KTR_GENIO)) {
307 didktr = 1;
308
309 if (IS_64BIT_PROCESS(p)) {
310 ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
311 &ktr_uio_buf[0], sizeof(ktr_uio_buf));
312 } else {
313 ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
314 &ktr_uio_buf[0], sizeof(ktr_uio_buf));
315 }
316 uio_addiov(ktruio, bufp, nbyte);
317 }
318 #endif
319 bytecnt = nbyte;
320
321 if ((error = fo_read(fp, auio, fp->f_cred, flags, p))) {
322 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
323 error == EINTR || error == EWOULDBLOCK))
324 error = 0;
325 }
326 bytecnt -= uio_resid(auio);
327 #if KTRACE
328 if (didktr && error == 0) {
329 uio_setresid(ktruio, bytecnt);
330 ktrgenio(p->p_tracep, fd, UIO_READ, ktruio, error);
331 }
332 #endif
333
334 *retval = bytecnt;
335
336 return (error);
337 }
338
339 /*
340 * Scatter read system call.
341 */
342 int
343 readv(p, uap, retval)
344 struct proc *p;
345 register struct readv_args *uap;
346 user_ssize_t *retval;
347 {
348 uio_t auio = NULL;
349 int error;
350 int size_of_iovec;
351 struct user_iovec *iovp;
352
353 /* Verify range bedfore calling uio_create() */
354 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
355 return (EINVAL);
356
357 /* allocate a uio large enough to hold the number of iovecs passed */
358 auio = uio_create(uap->iovcnt, 0,
359 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
360 UIO_READ);
361
362 /* get location of iovecs within the uio. then copyin the iovecs from
363 * user space.
364 */
365 iovp = uio_iovsaddr(auio);
366 if (iovp == NULL) {
367 error = ENOMEM;
368 goto ExitThisRoutine;
369 }
370 size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
371 error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
372 if (error) {
373 goto ExitThisRoutine;
374 }
375
376 /* finalize uio_t for use and do the IO
377 */
378 uio_calculateresid(auio);
379 error = rd_uio(p, uap->fd, auio, retval);
380
381 ExitThisRoutine:
382 if (auio != NULL) {
383 uio_free(auio);
384 }
385 return (error);
386 }
387
388 /*
389 * Write system call
390 */
391 int
392 write(p, uap, retval)
393 struct proc *p;
394 register struct write_args *uap;
395 user_ssize_t *retval;
396 {
397 struct fileproc *fp;
398 int error;
399 int fd = uap->fd;
400
401 error = fp_lookup(p,fd,&fp,0);
402 if (error)
403 return(error);
404 if ((fp->f_flag & FWRITE) == 0) {
405 error = EBADF;
406 } else {
407 error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte,
408 (off_t)-1, 0, retval);
409 }
410 if (error == 0)
411 fp_drop_written(p, fd, fp);
412 else
413 fp_drop(p, fd, fp, 0);
414 return(error);
415 }
416
417 /*
418 * pwrite system call
419 */
420 int
421 pwrite(p, uap, retval)
422 struct proc *p;
423 register struct pwrite_args *uap;
424 user_ssize_t *retval;
425 {
426 struct fileproc *fp;
427 int error;
428 int fd = uap->fd;
429
430 error = fp_lookup(p,fd,&fp,0);
431 if (error)
432 return(error);
433
434 if ((fp->f_flag & FWRITE) == 0) {
435 error = EBADF;
436 } else {
437 if (fp->f_type != DTYPE_VNODE) {
438 error = ESPIPE;
439 } else {
440 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
441 uap->offset, FOF_OFFSET, retval);
442 }
443 }
444 if (error == 0)
445 fp_drop_written(p, fd, fp);
446 else
447 fp_drop(p, fd, fp, 0);
448
449 if (!error)
450 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
451 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
452
453 return(error);
454 }
455
456 __private_extern__ int
457 dofilewrite(p, fp, fd, bufp, nbyte, offset, flags, retval)
458 struct proc *p;
459 struct fileproc *fp;
460 int fd, flags;
461 user_addr_t bufp;
462 user_size_t nbyte;
463 off_t offset;
464 user_ssize_t *retval;
465 {
466 uio_t auio;
467 long error = 0;
468 user_ssize_t bytecnt;
469 char uio_buf[ UIO_SIZEOF(1) ];
470 #if KTRACE
471 uio_t ktruio;
472 int didktr = 0;
473 char ktr_uio_buf[ UIO_SIZEOF(1) ];
474 #endif
475
476 // LP64todo - do we want to raise this?
477 if (nbyte > INT_MAX)
478 return (EINVAL);
479
480 if (IS_64BIT_PROCESS(p)) {
481 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
482 &uio_buf[0], sizeof(uio_buf));
483 } else {
484 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
485 &uio_buf[0], sizeof(uio_buf));
486 }
487 uio_addiov(auio, bufp, nbyte);
488
489 #if KTRACE
490 /*
491 * if tracing, save a copy of iovec and uio
492 */
493 if (KTRPOINT(p, KTR_GENIO)) {
494 didktr = 1;
495
496 if (IS_64BIT_PROCESS(p)) {
497 ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
498 &ktr_uio_buf[0], sizeof(ktr_uio_buf));
499 } else {
500 ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
501 &ktr_uio_buf[0], sizeof(ktr_uio_buf));
502 }
503 uio_addiov(ktruio, bufp, nbyte);
504 }
505 #endif
506 bytecnt = nbyte;
507 if ((error = fo_write(fp, auio, fp->f_cred, flags, p))) {
508 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
509 error == EINTR || error == EWOULDBLOCK))
510 error = 0;
511 /* The socket layer handles SIGPIPE */
512 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
513 psignal(p, SIGPIPE);
514 }
515 bytecnt -= uio_resid(auio);
516 #if KTRACE
517 if (didktr && error == 0) {
518 uio_setresid(ktruio, bytecnt);
519 ktrgenio(p->p_tracep, fd, UIO_WRITE, ktruio, error);
520 }
521 #endif
522 *retval = bytecnt;
523
524 return (error);
525 }
526
527 /*
528 * Gather write system call
529 */
530 int
531 writev(p, uap, retval)
532 struct proc *p;
533 register struct writev_args *uap;
534 user_ssize_t *retval;
535 {
536 uio_t auio = NULL;
537 int error;
538 int size_of_iovec;
539 struct user_iovec *iovp;
540
541 /* Verify range bedfore calling uio_create() */
542 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
543 return (EINVAL);
544
545 /* allocate a uio large enough to hold the number of iovecs passed */
546 auio = uio_create(uap->iovcnt, 0,
547 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
548 UIO_WRITE);
549
550 /* get location of iovecs within the uio. then copyin the iovecs from
551 * user space.
552 */
553 iovp = uio_iovsaddr(auio);
554 if (iovp == NULL) {
555 error = ENOMEM;
556 goto ExitThisRoutine;
557 }
558 size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
559 error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
560 if (error) {
561 goto ExitThisRoutine;
562 }
563
564 /* finalize uio_t for use and do the IO
565 */
566 uio_calculateresid(auio);
567 error = wr_uio(p, uap->fd, auio, retval);
568
569 ExitThisRoutine:
570 if (auio != NULL) {
571 uio_free(auio);
572 }
573 return (error);
574 }
575
576
577 int
578 wr_uio(p, fdes, uio, retval)
579 struct proc *p;
580 int fdes;
581 register uio_t uio;
582 user_ssize_t *retval;
583 {
584 struct fileproc *fp;
585 int error;
586 user_ssize_t count;
587 #if KTRACE
588 struct iovec_64 *ktriov = NULL;
589 struct uio ktruio;
590 int didktr = 0;
591 u_int iovlen;
592 #endif
593
594 error = fp_lookup(p,fdes,&fp,0);
595 if (error)
596 return(error);
597
598 if ((fp->f_flag & FWRITE) == 0) {
599 error = EBADF;
600 goto out;
601 }
602 count = uio_resid(uio);
603 #if KTRACE
604 /*
605 * if tracing, save a copy of iovec
606 */
607 if (KTRPOINT(p, KTR_GENIO)) {
608 iovlen = uio->uio_iovcnt *
609 (IS_64BIT_PROCESS(p) ? sizeof (struct iovec_64) : sizeof (struct iovec_32));
610 MALLOC(ktriov, struct iovec_64 *, iovlen, M_TEMP, M_WAITOK);
611 if (ktriov != NULL) {
612 bcopy((caddr_t)uio->uio_iovs.iov64p, (caddr_t)ktriov, iovlen);
613 ktruio = *uio;
614 didktr = 1;
615 }
616 }
617 #endif
618 error = fo_write(fp, uio, fp->f_cred, 0, p);
619 if (error) {
620 if (uio_resid(uio) != count && (error == ERESTART ||
621 error == EINTR || error == EWOULDBLOCK))
622 error = 0;
623 /* The socket layer handles SIGPIPE */
624 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
625 psignal(p, SIGPIPE);
626 }
627 *retval = count - uio_resid(uio);
628
629 #if KTRACE
630 if (didktr) {
631 if (error == 0) {
632 ktruio.uio_iovs.iov64p = ktriov;
633 uio_setresid(&ktruio, *retval);
634 ktrgenio(p->p_tracep, fdes, UIO_WRITE, &ktruio, error);
635 }
636 FREE(ktriov, M_TEMP);
637 }
638 #endif
639
640 out:
641 if ( (error == 0) )
642 fp_drop_written(p, fdes, fp);
643 else
644 fp_drop(p, fdes, fp, 0);
645 return(error);
646 }
647
648
649 int
650 rd_uio(p, fdes, uio, retval)
651 struct proc *p;
652 int fdes;
653 register uio_t uio;
654 user_ssize_t *retval;
655 {
656 struct fileproc *fp;
657 int error;
658 user_ssize_t count;
659 #if KTRACE
660 struct iovec_64 *ktriov = NULL;
661 struct uio ktruio;
662 int didktr = 0;
663 u_int iovlen;
664 #endif
665
666 if ( (error = preparefileread(p, &fp, fdes, 0)) )
667 return (error);
668
669 count = uio_resid(uio);
670 #if KTRACE
671 /*
672 * if tracing, save a copy of iovec
673 */
674 if (KTRPOINT(p, KTR_GENIO)) {
675 iovlen = uio->uio_iovcnt *
676 (IS_64BIT_PROCESS(p) ? sizeof (struct iovec_64) : sizeof (struct iovec_32));
677 MALLOC(ktriov, struct iovec_64 *, iovlen, M_TEMP, M_WAITOK);
678 if (ktriov != NULL) {
679 bcopy((caddr_t)uio->uio_iovs.iov64p, (caddr_t)ktriov, iovlen);
680 ktruio = *uio;
681 didktr = 1;
682 }
683 }
684 #endif
685 error = fo_read(fp, uio, fp->f_cred, 0, p);
686
687 if (error) {
688 if (uio_resid(uio) != count && (error == ERESTART ||
689 error == EINTR || error == EWOULDBLOCK))
690 error = 0;
691 }
692 *retval = count - uio_resid(uio);
693
694 #if KTRACE
695 if (didktr) {
696 if (error == 0) {
697 ktruio.uio_iovs.iov64p = ktriov;
698 uio_setresid(&ktruio, *retval);
699 ktrgenio(p->p_tracep, fdes, UIO_READ, &ktruio, error);
700 }
701 FREE(ktriov, M_TEMP);
702 }
703 #endif
704 donefileread(p, fp, fdes);
705
706 return (error);
707 }
708
709 /*
710 * Ioctl system call
711 *
712 */
713 int
714 ioctl(struct proc *p, register struct ioctl_args *uap, __unused register_t *retval)
715 {
716 struct fileproc *fp;
717 register u_long com;
718 int error = 0;
719 register u_int size;
720 caddr_t datap, memp;
721 boolean_t is64bit;
722 int tmp;
723 #define STK_PARAMS 128
724 char stkbuf[STK_PARAMS];
725 int fd = uap->fd;
726
727 AUDIT_ARG(fd, uap->fd);
728 AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */
729 AUDIT_ARG(addr, uap->data);
730
731 is64bit = proc_is64bit(p);
732
733 proc_fdlock(p);
734 error = fp_lookup(p,fd,&fp,1);
735 if (error) {
736 proc_fdunlock(p);
737 return(error);
738 }
739
740 AUDIT_ARG(file, p, fp);
741
742 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
743 error = EBADF;
744 goto out;
745 }
746
747 #if NETAT
748 /*
749 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
750 * while implementing an ATioctl system call
751 */
752 {
753 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
754 u_long fixed_command;
755 #ifdef APPLETALK_DEBUG
756 kprintf("ioctl: special AppleTalk \n");
757 #endif
758 datap = &stkbuf[0];
759 *(user_addr_t *)datap = uap->data;
760 fixed_command = _IOW(0, 0xff99, uap->data);
761 error = fo_ioctl(fp, fixed_command, datap, p);
762 goto out;
763 }
764 }
765
766 #endif /* NETAT */
767
768
769 switch (com = uap->com) {
770 case FIONCLEX:
771 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
772 error =0;
773 goto out;
774 case FIOCLEX:
775 *fdflags(p, uap->fd) |= UF_EXCLOSE;
776 error =0;
777 goto out;
778 }
779
780 /*
781 * Interpret high order word to find amount of data to be
782 * copied to/from the user's address space.
783 */
784 size = IOCPARM_LEN(com);
785 if (size > IOCPARM_MAX) {
786 error = ENOTTY;
787 goto out;
788 }
789 memp = NULL;
790 if (size > sizeof (stkbuf)) {
791 proc_fdunlock(p);
792 if ((memp = (caddr_t)kalloc(size)) == 0) {
793 proc_fdlock(p);
794 error = ENOMEM;
795 goto out;
796 }
797 proc_fdlock(p);
798 datap = memp;
799 } else
800 datap = &stkbuf[0];
801 if (com&IOC_IN) {
802 if (size) {
803 proc_fdunlock(p);
804 error = copyin(uap->data, datap, size);
805 if (error) {
806 if (memp)
807 kfree(memp, size);
808 proc_fdlock(p);
809 goto out;
810 }
811 proc_fdlock(p);
812 } else {
813 /* XXX - IOC_IN and no size? we should proably return an error here!! */
814 if (is64bit) {
815 *(user_addr_t *)datap = uap->data;
816 }
817 else {
818 *(uint32_t *)datap = (uint32_t)uap->data;
819 }
820 }
821 } else if ((com&IOC_OUT) && size)
822 /*
823 * Zero the buffer so the user always
824 * gets back something deterministic.
825 */
826 bzero(datap, size);
827 else if (com&IOC_VOID) {
828 /* XXX - this is odd since IOC_VOID means no parameters */
829 if (is64bit) {
830 *(user_addr_t *)datap = uap->data;
831 }
832 else {
833 *(uint32_t *)datap = (uint32_t)uap->data;
834 }
835 }
836
837 switch (com) {
838
839 case FIONBIO:
840 if ( (tmp = *(int *)datap) )
841 fp->f_flag |= FNONBLOCK;
842 else
843 fp->f_flag &= ~FNONBLOCK;
844 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
845 break;
846
847 case FIOASYNC:
848 if ( (tmp = *(int *)datap) )
849 fp->f_flag |= FASYNC;
850 else
851 fp->f_flag &= ~FASYNC;
852 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
853 break;
854
855 case FIOSETOWN:
856 tmp = *(int *)datap;
857 if (fp->f_type == DTYPE_SOCKET) {
858 ((struct socket *)fp->f_data)->so_pgid = tmp;
859 error = 0;
860 break;
861 }
862 if (fp->f_type == DTYPE_PIPE) {
863 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
864 break;
865 }
866 if (tmp <= 0) {
867 tmp = -tmp;
868 } else {
869 struct proc *p1 = pfind(tmp);
870 if (p1 == 0) {
871 error = ESRCH;
872 break;
873 }
874 tmp = p1->p_pgrp->pg_id;
875 }
876 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
877 break;
878
879 case FIOGETOWN:
880 if (fp->f_type == DTYPE_SOCKET) {
881 error = 0;
882 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
883 break;
884 }
885 error = fo_ioctl(fp, TIOCGPGRP, datap, p);
886 *(int *)datap = -*(int *)datap;
887 break;
888
889 default:
890 error = fo_ioctl(fp, com, datap, p);
891 /*
892 * Copy any data to user, size was
893 * already set and checked above.
894 */
895 if (error == 0 && (com&IOC_OUT) && size)
896 error = copyout(datap, uap->data, (u_int)size);
897 break;
898 }
899 proc_fdunlock(p);
900 if (memp)
901 kfree(memp, size);
902 proc_fdlock(p);
903 out:
904 fp_drop(p, fd, fp, 1);
905 proc_fdunlock(p);
906 return(error);
907 }
908
909 int selwait, nselcoll;
910 #define SEL_FIRSTPASS 1
911 #define SEL_SECONDPASS 2
912 extern int selcontinue(int error);
913 extern int selprocess(int error, int sel_pass);
914 static int selscan(struct proc *p, struct _select * sel,
915 int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub);
916 static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
917 int nfd, int * count);
918 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
919 extern uint64_t tvtoabstime(struct timeval *tvp);
920
921 /*
922 * Select system call.
923 */
924 int
925 select(struct proc *p, struct select_args *uap, register_t *retval)
926 {
927 int error = 0;
928 u_int ni, nw, size;
929 thread_t th_act;
930 struct uthread *uth;
931 struct _select *sel;
932 int needzerofill = 1;
933 int count = 0;
934
935 th_act = current_thread();
936 uth = get_bsdthread_info(th_act);
937 sel = &uth->uu_select;
938 retval = (int *)get_bsduthreadrval(th_act);
939 *retval = 0;
940
941 if (uap->nd < 0) {
942 return (EINVAL);
943 }
944
945 if (uap->nd > p->p_fd->fd_nfiles)
946 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
947
948 nw = howmany(uap->nd, NFDBITS);
949 ni = nw * sizeof(fd_mask);
950
951 /*
952 * if this is the first select by the thread
953 * allocate the space for bits.
954 */
955 if (sel->nbytes == 0) {
956 sel->nbytes = 3 * ni;
957 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO);
958 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO);
959 if ((sel->ibits == NULL) || (sel->obits == NULL))
960 panic("select out of memory");
961 needzerofill = 0;
962 }
963
964 /*
965 * if the previously allocated space for the bits
966 * is smaller than what is requested. Reallocate.
967 */
968 if (sel->nbytes < (3 * ni)) {
969 sel->nbytes = (3 * ni);
970 FREE(sel->ibits, M_TEMP);
971 FREE(sel->obits, M_TEMP);
972 MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO);
973 MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO);
974 if ((sel->ibits == NULL) || (sel->obits == NULL))
975 panic("select out of memory");
976 needzerofill = 0;
977 }
978
979 if (needzerofill) {
980 bzero((caddr_t)sel->ibits, sel->nbytes);
981 bzero((caddr_t)sel->obits, sel->nbytes);
982 }
983
984 /*
985 * get the bits from the user address space
986 */
987 #define getbits(name, x) \
988 do { \
989 if (uap->name && (error = copyin(uap->name, \
990 (caddr_t)&sel->ibits[(x) * nw], ni))) \
991 goto continuation; \
992 } while (0)
993
994 getbits(in, 0);
995 getbits(ou, 1);
996 getbits(ex, 2);
997 #undef getbits
998
999 if (uap->tv) {
1000 struct timeval atv;
1001 if (IS_64BIT_PROCESS(p)) {
1002 struct user_timeval atv64;
1003 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1004 /* Loses resolution - assume timeout < 68 years */
1005 atv.tv_sec = atv64.tv_sec;
1006 atv.tv_usec = atv64.tv_usec;
1007 } else {
1008 error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv));
1009 }
1010 if (error)
1011 goto continuation;
1012 if (itimerfix(&atv)) {
1013 error = EINVAL;
1014 goto continuation;
1015 }
1016
1017 clock_absolutetime_interval_to_deadline(
1018 tvtoabstime(&atv), &sel->abstime);
1019 }
1020 else
1021 sel->abstime = 0;
1022
1023 if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count)) ) {
1024 goto continuation;
1025 }
1026
1027 sel->count = count;
1028 size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
1029 if (sel->allocsize) {
1030 if (sel->wqset == 0)
1031 panic("select: wql memory smashed");
1032 /* needed for the select now */
1033 if (size > sel->allocsize) {
1034 kfree(sel->wqset, sel->allocsize);
1035 sel->allocsize = size;
1036 sel->wqset = (wait_queue_set_t)kalloc(size);
1037 if (sel->wqset == (wait_queue_set_t)NULL)
1038 panic("failed to allocate memory for waitqueue\n");
1039 }
1040 } else {
1041 sel->count = count;
1042 sel->allocsize = size;
1043 sel->wqset = (wait_queue_set_t)kalloc(sel->allocsize);
1044 if (sel->wqset == (wait_queue_set_t)NULL)
1045 panic("failed to allocate memory for waitqueue\n");
1046 }
1047 bzero(sel->wqset, size);
1048 sel->wql = (char *)sel->wqset + SIZEOF_WAITQUEUE_SET;
1049 wait_queue_set_init(sel->wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
1050
1051 continuation:
1052 return selprocess(error, SEL_FIRSTPASS);
1053 }
1054
1055 int
1056 selcontinue(int error)
1057 {
1058 return selprocess(error, SEL_SECONDPASS);
1059 }
1060
1061 int
1062 selprocess(int error, int sel_pass)
1063 {
1064 int ncoll;
1065 u_int ni, nw;
1066 thread_t th_act;
1067 struct uthread *uth;
1068 struct proc *p;
1069 struct select_args *uap;
1070 int *retval;
1071 struct _select *sel;
1072 int unwind = 1;
1073 int prepost = 0;
1074 int somewakeup = 0;
1075 int doretry = 0;
1076 wait_result_t wait_result;
1077
1078 p = current_proc();
1079 th_act = current_thread();
1080 uap = (struct select_args *)get_bsduthreadarg(th_act);
1081 retval = (int *)get_bsduthreadrval(th_act);
1082 uth = get_bsdthread_info(th_act);
1083 sel = &uth->uu_select;
1084
1085 /* if it is first pass wait queue is not setup yet */
1086 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1087 unwind = 0;
1088 if (sel->count == 0)
1089 unwind = 0;
1090 retry:
1091 if (error != 0) {
1092 goto done;
1093 }
1094
1095 ncoll = nselcoll;
1096 p->p_flag |= P_SELECT;
1097 /* skip scans if the select is just for timeouts */
1098 if (sel->count) {
1099 if (sel_pass == SEL_FIRSTPASS)
1100 wait_queue_sub_clearrefs(sel->wqset);
1101
1102 error = selscan(p, sel, uap->nd, retval, sel_pass, sel->wqset);
1103 if (error || *retval) {
1104 goto done;
1105 }
1106 if (prepost) {
1107 /* if the select of log, then we canwakeup and discover some one
1108 * else already read the data; go toselct again if time permits
1109 */
1110 prepost = 0;
1111 doretry = 1;
1112 }
1113 if (somewakeup) {
1114 somewakeup = 0;
1115 doretry = 1;
1116 }
1117 }
1118
1119 if (uap->tv) {
1120 uint64_t now;
1121
1122 clock_get_uptime(&now);
1123 if (now >= sel->abstime)
1124 goto done;
1125 }
1126
1127 if (doretry) {
1128 /* cleanup obits and try again */
1129 doretry = 0;
1130 sel_pass = SEL_FIRSTPASS;
1131 goto retry;
1132 }
1133
1134 /*
1135 * To effect a poll, the timeout argument should be
1136 * non-nil, pointing to a zero-valued timeval structure.
1137 */
1138 if (uap->tv && sel->abstime == 0) {
1139 goto done;
1140 }
1141
1142 /* No spurious wakeups due to colls,no need to check for them */
1143 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1144 sel_pass = SEL_FIRSTPASS;
1145 goto retry;
1146 }
1147
1148 p->p_flag &= ~P_SELECT;
1149
1150 /* if the select is just for timeout skip check */
1151 if (sel->count &&(sel_pass == SEL_SECONDPASS))
1152 panic("selprocess: 2nd pass assertwaiting");
1153
1154 /* Wait Queue Subordinate has waitqueue as first element */
1155 wait_result = wait_queue_assert_wait((wait_queue_t)sel->wqset,
1156 &selwait, THREAD_ABORTSAFE, sel->abstime);
1157 if (wait_result != THREAD_AWAKENED) {
1158 /* there are no preposted events */
1159 error = tsleep1(NULL, PSOCK | PCATCH,
1160 "select", 0, selcontinue);
1161 } else {
1162 prepost = 1;
1163 error = 0;
1164 }
1165
1166 sel_pass = SEL_SECONDPASS;
1167 if (error == 0) {
1168 if (!prepost)
1169 somewakeup =1;
1170 goto retry;
1171 }
1172 done:
1173 if (unwind) {
1174 wait_subqueue_unlink_all(sel->wqset);
1175 seldrop(p, sel->ibits, uap->nd);
1176 }
1177 p->p_flag &= ~P_SELECT;
1178 /* select is not restarted after signals... */
1179 if (error == ERESTART)
1180 error = EINTR;
1181 if (error == EWOULDBLOCK)
1182 error = 0;
1183 nw = howmany(uap->nd, NFDBITS);
1184 ni = nw * sizeof(fd_mask);
1185
1186 #define putbits(name, x) \
1187 do { \
1188 if (uap->name && (error2 = \
1189 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1190 error = error2; \
1191 } while (0)
1192
1193 if (error == 0) {
1194 int error2;
1195
1196 putbits(in, 0);
1197 putbits(ou, 1);
1198 putbits(ex, 2);
1199 #undef putbits
1200 }
1201 return(error);
1202 }
1203
1204 static int
1205 selscan(p, sel, nfd, retval, sel_pass, wqsub)
1206 struct proc *p;
1207 struct _select *sel;
1208 int nfd;
1209 register_t *retval;
1210 int sel_pass;
1211 wait_queue_sub_t wqsub;
1212 {
1213 register struct filedesc *fdp = p->p_fd;
1214 register int msk, i, j, fd;
1215 register u_int32_t bits;
1216 struct fileproc *fp;
1217 int n = 0;
1218 int nc = 0;
1219 static int flag[3] = { FREAD, FWRITE, 0 };
1220 u_int32_t *iptr, *optr;
1221 u_int nw;
1222 u_int32_t *ibits, *obits;
1223 char * wql;
1224 char * wql_ptr;
1225
1226 /*
1227 * Problems when reboot; due to MacOSX signal probs
1228 * in Beaker1C ; verify that the p->p_fd is valid
1229 */
1230 if (fdp == NULL) {
1231 *retval=0;
1232 return(EIO);
1233 }
1234 ibits = sel->ibits;
1235 obits = sel->obits;
1236 wql = sel->wql;
1237
1238 nw = howmany(nfd, NFDBITS);
1239
1240 nc = 0;
1241 proc_fdlock(p);
1242
1243 if (sel->count) {
1244 for (msk = 0; msk < 3; msk++) {
1245 iptr = (u_int32_t *)&ibits[msk * nw];
1246 optr = (u_int32_t *)&obits[msk * nw];
1247
1248 for (i = 0; i < nfd; i += NFDBITS) {
1249 bits = iptr[i/NFDBITS];
1250
1251 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1252 bits &= ~(1 << j);
1253 fp = fdp->fd_ofiles[fd];
1254
1255 if (fp == NULL ||
1256 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1257 proc_fdunlock(p);
1258 return(EBADF);
1259 }
1260 if (sel_pass == SEL_SECONDPASS) {
1261 wql_ptr = (char *)0;
1262 fp->f_flags &= ~FP_INSELECT;
1263 fp->f_waddr = (void *)0;
1264 } else {
1265 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1266 fp->f_flags |= FP_INSELECT;
1267 fp->f_waddr = (void *)wqsub;
1268 }
1269 if (fp->f_ops && fo_select(fp, flag[msk], wql_ptr, p)) {
1270 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1271 n++;
1272 }
1273 nc++;
1274 }
1275 }
1276 }
1277 }
1278 proc_fdunlock(p);
1279 *retval = n;
1280 return (0);
1281 }
1282
1283 static int poll_callback(struct kqueue *, struct kevent *, void *);
1284
1285 struct poll_continue_args {
1286 user_addr_t pca_fds;
1287 u_int pca_nfds;
1288 u_int pca_rfds;
1289 };
1290
1291 int
1292 poll(struct proc *p, struct poll_args *uap, register_t *retval)
1293 {
1294 struct poll_continue_args *cont;
1295 struct pollfd *fds;
1296 struct kqueue *kq;
1297 struct timeval atv;
1298 int ncoll, error = 0;
1299 u_int nfds = uap->nfds;
1300 u_int rfds = 0;
1301 u_int i;
1302 size_t ni;
1303
1304 /*
1305 * This is kinda bogus. We have fd limits, but that is not
1306 * really related to the size of the pollfd array. Make sure
1307 * we let the process use at least FD_SETSIZE entries and at
1308 * least enough for the current limits. We want to be reasonably
1309 * safe, but not overly restrictive.
1310 */
1311 if (nfds > OPEN_MAX ||
1312 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE))
1313 return (EINVAL);
1314
1315 kq = kqueue_alloc(p);
1316 if (kq == NULL)
1317 return (EAGAIN);
1318
1319 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1320 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1321 if (NULL == cont) {
1322 error = EAGAIN;
1323 goto out;
1324 }
1325
1326 fds = (struct pollfd *)&cont[1];
1327 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1328 if (error)
1329 goto out;
1330
1331 if (uap->timeout != -1) {
1332 struct timeval rtv;
1333
1334 atv.tv_sec = uap->timeout / 1000;
1335 atv.tv_usec = (uap->timeout % 1000) * 1000;
1336 if (itimerfix(&atv)) {
1337 error = EINVAL;
1338 goto out;
1339 }
1340 getmicrouptime(&rtv);
1341 timevaladd(&atv, &rtv);
1342 } else {
1343 atv.tv_sec = 0;
1344 atv.tv_usec = 0;
1345 }
1346
1347 /* JMM - all this P_SELECT stuff is bogus */
1348 ncoll = nselcoll;
1349 p->p_flag |= P_SELECT;
1350
1351 for (i = 0; i < nfds; i++) {
1352 short events = fds[i].events;
1353 struct kevent kev;
1354 int kerror = 0;
1355
1356 /* per spec, ignore fd values below zero */
1357 if (fds[i].fd < 0) {
1358 fds[i].revents = 0;
1359 continue;
1360 }
1361
1362 /* convert the poll event into a kqueue kevent */
1363 kev.ident = fds[i].fd;
1364 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
1365 kev.fflags = NOTE_LOWAT;
1366 kev.data = 1; /* efficiency be damned: any data should trigger */
1367 kev.udata = CAST_USER_ADDR_T(&fds[i]);
1368
1369 /* Handle input events */
1370 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND )) {
1371 kev.filter = EVFILT_READ;
1372 if (!(events & ( POLLIN | POLLRDNORM )))
1373 kev.flags |= EV_OOBAND;
1374 kerror = kevent_register(kq, &kev, p);
1375 }
1376
1377 /* Handle output events */
1378 if (kerror == 0 &&
1379 events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1380 kev.filter = EVFILT_WRITE;
1381 kerror = kevent_register(kq, &kev, p);
1382 }
1383
1384 /* Handle BSD extension vnode events */
1385 if (kerror == 0 &&
1386 events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1387 kev.filter = EVFILT_VNODE;
1388 kev.fflags = 0;
1389 if (events & POLLEXTEND)
1390 kev.fflags |= NOTE_EXTEND;
1391 if (events & POLLATTRIB)
1392 kev.fflags |= NOTE_ATTRIB;
1393 if (events & POLLNLINK)
1394 kev.fflags |= NOTE_LINK;
1395 if (events & POLLWRITE)
1396 kev.fflags |= NOTE_WRITE;
1397 kerror = kevent_register(kq, &kev, p);
1398 }
1399
1400 if (kerror != 0) {
1401 fds[i].revents = POLLNVAL;
1402 rfds++;
1403 } else
1404 fds[i].revents = 0;
1405 }
1406
1407 /* Did we have any trouble registering? */
1408 if (rfds > 0)
1409 goto done;
1410
1411 /* scan for, and possibly wait for, the kevents to trigger */
1412 cont->pca_fds = uap->fds;
1413 cont->pca_nfds = nfds;
1414 cont->pca_rfds = rfds;
1415 error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p);
1416 rfds = cont->pca_rfds;
1417
1418 done:
1419 p->p_flag &= ~P_SELECT;
1420 /* poll is not restarted after signals... */
1421 if (error == ERESTART)
1422 error = EINTR;
1423 if (error == EWOULDBLOCK)
1424 error = 0;
1425 if (error == 0) {
1426 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1427 *retval = rfds;
1428 }
1429
1430 out:
1431 if (NULL != cont)
1432 FREE(cont, M_TEMP);
1433
1434 kqueue_dealloc(kq, p);
1435 return (error);
1436 }
1437
1438 static int
1439 poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1440 {
1441 struct poll_continue_args *cont = (struct poll_continue_args *)data;
1442 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
1443 short mask;
1444
1445 /* convert the results back into revents */
1446 if (kevp->flags & EV_EOF)
1447 fds->revents |= POLLHUP;
1448 if (kevp->flags & EV_ERROR)
1449 fds->revents |= POLLERR;
1450 cont->pca_rfds++;
1451
1452 switch (kevp->filter) {
1453 case EVFILT_READ:
1454 if (fds->revents & POLLHUP)
1455 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1456 else {
1457 mask = 0;
1458 if (kevp->data != 0)
1459 mask |= (POLLIN | POLLRDNORM );
1460 if (kevp->flags & EV_OOBAND)
1461 mask |= ( POLLPRI | POLLRDBAND );
1462 }
1463 fds->revents |= (fds->events & mask);
1464 break;
1465
1466 case EVFILT_WRITE:
1467 if (!(fds->revents & POLLHUP))
1468 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1469 break;
1470
1471 case EVFILT_PROC:
1472 if (kevp->fflags & NOTE_EXTEND)
1473 fds->revents |= (fds->events & POLLEXTEND);
1474 if (kevp->fflags & NOTE_ATTRIB)
1475 fds->revents |= (fds->events & POLLATTRIB);
1476 if (kevp->fflags & NOTE_LINK)
1477 fds->revents |= (fds->events & POLLNLINK);
1478 if (kevp->fflags & NOTE_WRITE)
1479 fds->revents |= (fds->events & POLLWRITE);
1480 break;
1481 }
1482 return 0;
1483 }
1484
1485 int
1486 seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1487 {
1488
1489 return (1);
1490 }
1491
1492 static int
1493 selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
1494 int nfd, int *count)
1495 {
1496 register struct filedesc *fdp = p->p_fd;
1497 register int msk, i, j, fd;
1498 register u_int32_t bits;
1499 struct fileproc *fp;
1500 int n = 0;
1501 u_int32_t *iptr;
1502 u_int nw;
1503 int error=0;
1504 int dropcount;
1505
1506 /*
1507 * Problems when reboot; due to MacOSX signal probs
1508 * in Beaker1C ; verify that the p->p_fd is valid
1509 */
1510 if (fdp == NULL) {
1511 *count=0;
1512 return(EIO);
1513 }
1514 nw = howmany(nfd, NFDBITS);
1515
1516 proc_fdlock(p);
1517 for (msk = 0; msk < 3; msk++) {
1518 iptr = (u_int32_t *)&ibits[msk * nw];
1519 for (i = 0; i < nfd; i += NFDBITS) {
1520 bits = iptr[i/NFDBITS];
1521 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1522 bits &= ~(1 << j);
1523 fp = fdp->fd_ofiles[fd];
1524 if (fp == NULL ||
1525 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1526 *count=0;
1527 error = EBADF;
1528 goto bad;
1529 }
1530 fp->f_iocount++;
1531 n++;
1532 }
1533 }
1534 }
1535 proc_fdunlock(p);
1536
1537 *count = n;
1538 return (0);
1539 bad:
1540 dropcount = 0;
1541
1542 if (n== 0)
1543 goto out;
1544 /* undo the iocounts */
1545 for (msk = 0; msk < 3; msk++) {
1546 iptr = (u_int32_t *)&ibits[msk * nw];
1547 for (i = 0; i < nfd; i += NFDBITS) {
1548 bits = iptr[i/NFDBITS];
1549 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1550 bits &= ~(1 << j);
1551 fp = fdp->fd_ofiles[fd];
1552 if (dropcount >= n)
1553 goto out;
1554 fp->f_iocount--;
1555
1556 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1557 p->p_fpdrainwait = 0;
1558 wakeup(&p->p_fpdrainwait);
1559 }
1560 dropcount++;
1561 }
1562 }
1563 }
1564 out:
1565 proc_fdunlock(p);
1566 return(error);
1567 }
1568
1569 static int
1570 seldrop(p, ibits, nfd)
1571 struct proc *p;
1572 u_int32_t *ibits;
1573 int nfd;
1574 {
1575 register struct filedesc *fdp = p->p_fd;
1576 register int msk, i, j, fd;
1577 register u_int32_t bits;
1578 struct fileproc *fp;
1579 int n = 0;
1580 u_int32_t *iptr;
1581 u_int nw;
1582
1583 /*
1584 * Problems when reboot; due to MacOSX signal probs
1585 * in Beaker1C ; verify that the p->p_fd is valid
1586 */
1587 if (fdp == NULL) {
1588 return(EIO);
1589 }
1590
1591 nw = howmany(nfd, NFDBITS);
1592
1593
1594 proc_fdlock(p);
1595 for (msk = 0; msk < 3; msk++) {
1596 iptr = (u_int32_t *)&ibits[msk * nw];
1597 for (i = 0; i < nfd; i += NFDBITS) {
1598 bits = iptr[i/NFDBITS];
1599 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1600 bits &= ~(1 << j);
1601 fp = fdp->fd_ofiles[fd];
1602 if (fp == NULL
1603 #if 0
1604 /* if you are here then it is being closed */
1605 || (fdp->fd_ofileflags[fd] & UF_RESERVED)
1606 #endif
1607 ) {
1608 proc_fdunlock(p);
1609 return(EBADF);
1610 }
1611 n++;
1612 fp->f_iocount--;
1613 fp->f_flags &= ~FP_INSELECT;
1614
1615 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1616 p->p_fpdrainwait = 0;
1617 wakeup(&p->p_fpdrainwait);
1618 }
1619 }
1620 }
1621 }
1622 proc_fdunlock(p);
1623 return (0);
1624 }
1625
1626 /*
1627 * Record a select request.
1628 */
1629 void
1630 selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1631 {
1632 thread_t cur_act = current_thread();
1633 struct uthread * ut = get_bsdthread_info(cur_act);
1634
1635 /* need to look at collisions */
1636
1637 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1638 return;
1639 }
1640
1641 /*do not record if this is second pass of select */
1642 if((p_wql == (void *)0)) {
1643 return;
1644 }
1645
1646 if ((sip->si_flags & SI_INITED) == 0) {
1647 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
1648 sip->si_flags |= SI_INITED;
1649 sip->si_flags &= ~SI_CLEAR;
1650 }
1651
1652 if (sip->si_flags & SI_RECORDED) {
1653 sip->si_flags |= SI_COLL;
1654 } else
1655 sip->si_flags &= ~SI_COLL;
1656
1657 sip->si_flags |= SI_RECORDED;
1658 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_select.wqset))
1659 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_select.wqset,
1660 (wait_queue_link_t)p_wql);
1661
1662 return;
1663 }
1664
1665 void
1666 selwakeup(sip)
1667 register struct selinfo *sip;
1668 {
1669
1670 if ((sip->si_flags & SI_INITED) == 0) {
1671 return;
1672 }
1673
1674 if (sip->si_flags & SI_COLL) {
1675 nselcoll++;
1676 sip->si_flags &= ~SI_COLL;
1677 #if 0
1678 /* will not support */
1679 //wakeup((caddr_t)&selwait);
1680 #endif
1681 }
1682
1683 if (sip->si_flags & SI_RECORDED) {
1684 wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED);
1685 sip->si_flags &= ~SI_RECORDED;
1686 }
1687
1688 }
1689
1690 void
1691 selthreadclear(sip)
1692 register struct selinfo *sip;
1693 {
1694
1695 if ((sip->si_flags & SI_INITED) == 0) {
1696 return;
1697 }
1698 if (sip->si_flags & SI_RECORDED) {
1699 selwakeup(sip);
1700 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1701 }
1702 sip->si_flags |= SI_CLEAR;
1703 wait_queue_unlinkall_nofree(&sip->si_wait_queue);
1704 }
1705
1706
1707
1708
1709 #define DBG_POST 0x10
1710 #define DBG_WATCH 0x11
1711 #define DBG_WAIT 0x12
1712 #define DBG_MOD 0x13
1713 #define DBG_EWAKEUP 0x14
1714 #define DBG_ENQUEUE 0x15
1715 #define DBG_DEQUEUE 0x16
1716
1717 #define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1718 #define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1719 #define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1720 #define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1721 #define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1722 #define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1723 #define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1724
1725
1726 #define EVPROCDEQUE(p, evq) do { \
1727 proc_lock(p); \
1728 if (evq->ee_flags & EV_QUEUED) { \
1729 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \
1730 evq->ee_flags &= ~EV_QUEUED; \
1731 } \
1732 proc_unlock(p); \
1733 } while (0);
1734
1735
1736 /*
1737 * called upon socket close. deque and free all events for
1738 * the socket... socket must be locked by caller.
1739 */
1740 void
1741 evsofree(struct socket *sp)
1742 {
1743 struct eventqelt *evq, *next;
1744 proc_t p;
1745
1746 if (sp == NULL)
1747 return;
1748
1749 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1750 next = evq->ee_slist.tqe_next;
1751 p = evq->ee_proc;
1752
1753 if (evq->ee_flags & EV_QUEUED) {
1754 EVPROCDEQUE(p, evq);
1755 }
1756 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1757 FREE(evq, M_TEMP);
1758 }
1759 }
1760
1761
1762 /*
1763 * called upon pipe close. deque and free all events for
1764 * the pipe... pipe must be locked by caller
1765 */
1766 void
1767 evpipefree(struct pipe *cpipe)
1768 {
1769 struct eventqelt *evq, *next;
1770 proc_t p;
1771
1772 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1773 next = evq->ee_slist.tqe_next;
1774 p = evq->ee_proc;
1775
1776 EVPROCDEQUE(p, evq);
1777
1778 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1779 FREE(evq, M_TEMP);
1780 }
1781 }
1782
1783
1784 /*
1785 * enqueue this event if it's not already queued. wakeup
1786 * the proc if we do queue this event to it...
1787 * entered with proc lock held... we drop it before
1788 * doing the wakeup and return in that state
1789 */
1790 static void
1791 evprocenque(struct eventqelt *evq)
1792 {
1793 proc_t p;
1794
1795 assert(evq);
1796 p = evq->ee_proc;
1797
1798 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, evq, evq->ee_flags, evq->ee_eventmask,0,0);
1799
1800 proc_lock(p);
1801
1802 if (evq->ee_flags & EV_QUEUED) {
1803 proc_unlock(p);
1804
1805 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1806 return;
1807 }
1808 evq->ee_flags |= EV_QUEUED;
1809
1810 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
1811
1812 proc_unlock(p);
1813
1814 wakeup(&p->p_evlist);
1815
1816 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1817 }
1818
1819
1820 /*
1821 * pipe lock must be taken by the caller
1822 */
1823 void
1824 postpipeevent(struct pipe *pipep, int event)
1825 {
1826 int mask;
1827 struct eventqelt *evq;
1828
1829 if (pipep == NULL)
1830 return;
1831 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
1832
1833 for (evq = pipep->pipe_evlist.tqh_first;
1834 evq != NULL; evq = evq->ee_slist.tqe_next) {
1835
1836 if (evq->ee_eventmask == 0)
1837 continue;
1838 mask = 0;
1839
1840 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
1841
1842 case EV_RWBYTES:
1843 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
1844 mask |= EV_RE;
1845 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
1846 }
1847 if ((evq->ee_eventmask & EV_WR) &&
1848 (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
1849
1850 if (pipep->pipe_state & PIPE_EOF) {
1851 mask |= EV_WR|EV_RESET;
1852 break;
1853 }
1854 mask |= EV_WR;
1855 evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
1856 }
1857 break;
1858
1859 case EV_WCLOSED:
1860 case EV_RCLOSED:
1861 if ((evq->ee_eventmask & EV_RE)) {
1862 mask |= EV_RE|EV_RCLOSED;
1863 }
1864 if ((evq->ee_eventmask & EV_WR)) {
1865 mask |= EV_WR|EV_WCLOSED;
1866 }
1867 break;
1868
1869 default:
1870 return;
1871 }
1872 if (mask) {
1873 /*
1874 * disarm... postevents are nops until this event is 'read' via
1875 * waitevent and then re-armed via modwatch
1876 */
1877 evq->ee_eventmask = 0;
1878
1879 /*
1880 * since events are disarmed until after the waitevent
1881 * the ee_req.er_xxxx fields can't change once we've
1882 * inserted this event into the proc queue...
1883 * therefore, the waitevent will see a 'consistent'
1884 * snapshot of the event, even though it won't hold
1885 * the pipe lock, and we're updating the event outside
1886 * of the proc lock, which it will hold
1887 */
1888 evq->ee_req.er_eventbits |= mask;
1889
1890 KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask, 1,0);
1891
1892 evprocenque(evq);
1893 }
1894 }
1895 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
1896 }
1897
1898
1899 /*
1900 * given either a sockbuf or a socket run down the
1901 * event list and queue ready events found...
1902 * the socket must be locked by the caller
1903 */
1904 void
1905 postevent(struct socket *sp, struct sockbuf *sb, int event)
1906 {
1907 int mask;
1908 struct eventqelt *evq;
1909 struct tcpcb *tp;
1910
1911 if (sb)
1912 sp = sb->sb_so;
1913 if (sp == NULL)
1914 return;
1915
1916 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
1917
1918 for (evq = sp->so_evlist.tqh_first;
1919 evq != NULL; evq = evq->ee_slist.tqe_next) {
1920
1921 if (evq->ee_eventmask == 0)
1922 continue;
1923 mask = 0;
1924
1925 /* ready for reading:
1926 - byte cnt >= receive low water mark
1927 - read-half of conn closed
1928 - conn pending for listening sock
1929 - socket error pending
1930
1931 ready for writing
1932 - byte cnt avail >= send low water mark
1933 - write half of conn closed
1934 - socket error pending
1935 - non-blocking conn completed successfully
1936
1937 exception pending
1938 - out of band data
1939 - sock at out of band mark
1940 */
1941
1942 switch (event & EV_DMASK) {
1943
1944 case EV_OOB:
1945 if ((evq->ee_eventmask & EV_EX)) {
1946 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
1947 mask |= EV_EX|EV_OOB;
1948 }
1949 break;
1950
1951 case EV_RWBYTES|EV_OOB:
1952 if ((evq->ee_eventmask & EV_EX)) {
1953 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
1954 mask |= EV_EX|EV_OOB;
1955 }
1956 /*
1957 * fall into the next case
1958 */
1959 case EV_RWBYTES:
1960 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
1961 if (sp->so_error) {
1962 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
1963 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
1964 (tp->t_state == TCPS_CLOSED)) {
1965 mask |= EV_RE|EV_RESET;
1966 break;
1967 }
1968 }
1969 }
1970 mask |= EV_RE;
1971 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
1972
1973 if (sp->so_state & SS_CANTRCVMORE) {
1974 mask |= EV_FIN;
1975 break;
1976 }
1977 }
1978 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
1979 if (sp->so_error) {
1980 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
1981 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
1982 (tp->t_state == TCPS_CLOSED)) {
1983 mask |= EV_WR|EV_RESET;
1984 break;
1985 }
1986 }
1987 }
1988 mask |= EV_WR;
1989 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
1990 }
1991 break;
1992
1993 case EV_RCONN:
1994 if ((evq->ee_eventmask & EV_RE)) {
1995 mask |= EV_RE|EV_RCONN;
1996 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
1997 }
1998 break;
1999
2000 case EV_WCONN:
2001 if ((evq->ee_eventmask & EV_WR)) {
2002 mask |= EV_WR|EV_WCONN;
2003 }
2004 break;
2005
2006 case EV_RCLOSED:
2007 if ((evq->ee_eventmask & EV_RE)) {
2008 mask |= EV_RE|EV_RCLOSED;
2009 }
2010 break;
2011
2012 case EV_WCLOSED:
2013 if ((evq->ee_eventmask & EV_WR)) {
2014 mask |= EV_WR|EV_WCLOSED;
2015 }
2016 break;
2017
2018 case EV_FIN:
2019 if (evq->ee_eventmask & EV_RE) {
2020 mask |= EV_RE|EV_FIN;
2021 }
2022 break;
2023
2024 case EV_RESET:
2025 case EV_TIMEOUT:
2026 if (evq->ee_eventmask & EV_RE) {
2027 mask |= EV_RE | event;
2028 }
2029 if (evq->ee_eventmask & EV_WR) {
2030 mask |= EV_WR | event;
2031 }
2032 break;
2033
2034 default:
2035 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2036 return;
2037 } /* switch */
2038
2039 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2040
2041 if (mask) {
2042 /*
2043 * disarm... postevents are nops until this event is 'read' via
2044 * waitevent and then re-armed via modwatch
2045 */
2046 evq->ee_eventmask = 0;
2047
2048 /*
2049 * since events are disarmed until after the waitevent
2050 * the ee_req.er_xxxx fields can't change once we've
2051 * inserted this event into the proc queue...
2052 * since waitevent can't see this event until we
2053 * enqueue it, waitevent will see a 'consistent'
2054 * snapshot of the event, even though it won't hold
2055 * the socket lock, and we're updating the event outside
2056 * of the proc lock, which it will hold
2057 */
2058 evq->ee_req.er_eventbits |= mask;
2059
2060 evprocenque(evq);
2061 }
2062 }
2063 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
2064 }
2065
2066
2067 /*
2068 * watchevent system call. user passes us an event to watch
2069 * for. we malloc an event object, initialize it, and queue
2070 * it to the open socket. when the event occurs, postevent()
2071 * will enque it back to our proc where we can retrieve it
2072 * via waitevent().
2073 *
2074 * should this prevent duplicate events on same socket?
2075 */
2076 int
2077 watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
2078 {
2079 struct eventqelt *evq = (struct eventqelt *)0;
2080 struct eventqelt *np = NULL;
2081 struct eventreq *erp;
2082 struct fileproc *fp = NULL;
2083 int error;
2084
2085 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2086
2087 // get a qelt and fill with users req
2088 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2089
2090 if (evq == NULL)
2091 panic("can't MALLOC evq");
2092 erp = &evq->ee_req;
2093
2094 // get users request pkt
2095 if ( (error = copyin(CAST_USER_ADDR_T(uap->u_req), (caddr_t)erp,
2096 sizeof(struct eventreq))) ) {
2097 FREE(evq, M_TEMP);
2098
2099 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2100 return(error);
2101 }
2102 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,evq,0,0);
2103
2104 // validate, freeing qelt if errors
2105 error = 0;
2106 proc_fdlock(p);
2107
2108 if (erp->er_type != EV_FD) {
2109 error = EINVAL;
2110 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2111 error = EBADF;
2112 } else if (fp->f_type == DTYPE_SOCKET) {
2113 socket_lock((struct socket *)fp->f_data, 1);
2114 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2115 } else if (fp->f_type == DTYPE_PIPE) {
2116 PIPE_LOCK((struct pipe *)fp->f_data);
2117 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2118 } else {
2119 fp_drop(p, erp->er_handle, fp, 1);
2120 error = EINVAL;
2121 }
2122 proc_fdunlock(p);
2123
2124 if (error) {
2125 FREE(evq, M_TEMP);
2126
2127 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2128 return(error);
2129 }
2130
2131 /*
2132 * only allow one watch per file per proc
2133 */
2134 for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2135 if (np->ee_proc == p) {
2136 if (fp->f_type == DTYPE_SOCKET)
2137 socket_unlock((struct socket *)fp->f_data, 1);
2138 else
2139 PIPE_UNLOCK((struct pipe *)fp->f_data);
2140 fp_drop(p, erp->er_handle, fp, 0);
2141 FREE(evq, M_TEMP);
2142
2143 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2144 return(EINVAL);
2145 }
2146 }
2147 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2148 evq->ee_proc = p;
2149 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2150 evq->ee_flags = 0;
2151
2152 if (fp->f_type == DTYPE_SOCKET) {
2153 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2154 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2155
2156 socket_unlock((struct socket *)fp->f_data, 1);
2157 } else {
2158 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2159 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2160
2161 PIPE_UNLOCK((struct pipe *)fp->f_data);
2162 }
2163 fp_drop_event(p, erp->er_handle, fp);
2164
2165 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2166 return(0);
2167 }
2168
2169
2170
2171 /*
2172 * waitevent system call.
2173 * grabs the next waiting event for this proc and returns
2174 * it. if no events, user can request to sleep with timeout
2175 * or poll mode (tv=NULL);
2176 */
2177 int
2178 waitevent(proc_t p, struct waitevent_args *uap, int *retval)
2179 {
2180 int error = 0;
2181 struct eventqelt *evq;
2182 struct eventreq er;
2183 uint64_t abstime, interval;
2184
2185 if (uap->tv) {
2186 struct timeval atv;
2187
2188 error = copyin(CAST_USER_ADDR_T(uap->tv), (caddr_t)&atv, sizeof (atv));
2189 if (error)
2190 return(error);
2191 if (itimerfix(&atv)) {
2192 error = EINVAL;
2193 return(error);
2194 }
2195 interval = tvtoabstime(&atv);
2196 } else
2197 interval = 0;
2198
2199 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
2200
2201 proc_lock(p);
2202 retry:
2203 if ((evq = p->p_evlist.tqh_first) != NULL) {
2204 /*
2205 * found one... make a local copy while it's still on the queue
2206 * to prevent it from changing while in the midst of copying
2207 * don't want to hold the proc lock across a copyout because
2208 * it might block on a page fault at the target in user space
2209 */
2210 bcopy((caddr_t)&evq->ee_req, (caddr_t)&er, sizeof (struct eventreq));
2211
2212 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2213
2214 evq->ee_flags &= ~EV_QUEUED;
2215
2216 proc_unlock(p);
2217
2218 error = copyout((caddr_t)&er, CAST_USER_ADDR_T(uap->u_req), sizeof(struct eventreq));
2219
2220 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2221 evq->ee_req.er_handle,evq->ee_req.er_eventbits,evq,0);
2222 return (error);
2223 }
2224 else {
2225 if (uap->tv && interval == 0) {
2226 proc_unlock(p);
2227 *retval = 1; // poll failed
2228
2229 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
2230 return (error);
2231 }
2232 if (interval != 0)
2233 clock_absolutetime_interval_to_deadline(interval, &abstime);
2234 else
2235 abstime = 0;
2236
2237 KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0);
2238
2239 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2240
2241 KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0);
2242
2243 if (error == 0)
2244 goto retry;
2245 if (error == ERESTART)
2246 error = EINTR;
2247 if (error == EWOULDBLOCK) {
2248 *retval = 1;
2249 error = 0;
2250 }
2251 }
2252 proc_unlock(p);
2253
2254 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
2255 return (error);
2256 }
2257
2258
2259 /*
2260 * modwatch system call. user passes in event to modify.
2261 * if we find it we reset the event bits and que/deque event
2262 * it needed.
2263 */
2264 int
2265 modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
2266 {
2267 struct eventreq er;
2268 struct eventreq *erp = &er;
2269 struct eventqelt *evq;
2270 int error;
2271 struct fileproc *fp;
2272 int flag;
2273
2274 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2275
2276 /*
2277 * get user's request pkt
2278 */
2279 if ((error = copyin(CAST_USER_ADDR_T(uap->u_req), (caddr_t)erp,
2280 sizeof(struct eventreq)))) {
2281 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2282 return(error);
2283 }
2284 proc_fdlock(p);
2285
2286 if (erp->er_type != EV_FD) {
2287 error = EINVAL;
2288 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2289 error = EBADF;
2290 } else if (fp->f_type == DTYPE_SOCKET) {
2291 socket_lock((struct socket *)fp->f_data, 1);
2292 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2293 } else if (fp->f_type == DTYPE_PIPE) {
2294 PIPE_LOCK((struct pipe *)fp->f_data);
2295 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2296 } else {
2297 fp_drop(p, erp->er_handle, fp, 1);
2298 error = EINVAL;
2299 }
2300
2301 if (error) {
2302 proc_fdunlock(p);
2303 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2304 return(error);
2305 }
2306
2307 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2308 fp->f_flags &= ~FP_WAITEVENT;
2309 }
2310 proc_fdunlock(p);
2311
2312 // locate event if possible
2313 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2314 if (evq->ee_proc == p)
2315 break;
2316 }
2317 if (evq == NULL) {
2318 if (fp->f_type == DTYPE_SOCKET)
2319 socket_unlock((struct socket *)fp->f_data, 1);
2320 else
2321 PIPE_UNLOCK((struct pipe *)fp->f_data);
2322 fp_drop(p, erp->er_handle, fp, 0);
2323 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2324 return(EINVAL);
2325 }
2326 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0);
2327
2328 if (uap->u_eventmask == EV_RM) {
2329 EVPROCDEQUE(p, evq);
2330
2331 if (fp->f_type == DTYPE_SOCKET) {
2332 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2333 socket_unlock((struct socket *)fp->f_data, 1);
2334 } else {
2335 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2336 PIPE_UNLOCK((struct pipe *)fp->f_data);
2337 }
2338 fp_drop(p, erp->er_handle, fp, 0);
2339 FREE(evq, M_TEMP);
2340 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2341 return(0);
2342 }
2343 switch (uap->u_eventmask & EV_MASK) {
2344
2345 case 0:
2346 flag = 0;
2347 break;
2348
2349 case EV_RE:
2350 case EV_WR:
2351 case EV_RE|EV_WR:
2352 flag = EV_RWBYTES;
2353 break;
2354
2355 case EV_EX:
2356 flag = EV_OOB;
2357 break;
2358
2359 case EV_EX|EV_RE:
2360 case EV_EX|EV_WR:
2361 case EV_EX|EV_RE|EV_WR:
2362 flag = EV_OOB|EV_RWBYTES;
2363 break;
2364
2365 default:
2366 if (fp->f_type == DTYPE_SOCKET)
2367 socket_unlock((struct socket *)fp->f_data, 1);
2368 else
2369 PIPE_UNLOCK((struct pipe *)fp->f_data);
2370 fp_drop(p, erp->er_handle, fp, 0);
2371 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2372 return(EINVAL);
2373 }
2374 /*
2375 * since we're holding the socket/pipe lock, the event
2376 * cannot go from the unqueued state to the queued state
2377 * however, it can go from the queued state to the unqueued state
2378 * since that direction is protected by the proc_lock...
2379 * so do a quick check for EV_QUEUED w/o holding the proc lock
2380 * since by far the common case will be NOT EV_QUEUED, this saves
2381 * us taking the proc_lock the majority of the time
2382 */
2383 if (evq->ee_flags & EV_QUEUED) {
2384 /*
2385 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2386 */
2387 EVPROCDEQUE(p, evq);
2388 }
2389 /*
2390 * while the event is off the proc queue and
2391 * we're holding the socket/pipe lock
2392 * it's safe to update these fields...
2393 */
2394 evq->ee_req.er_eventbits = 0;
2395 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2396
2397 if (fp->f_type == DTYPE_SOCKET) {
2398 postevent((struct socket *)fp->f_data, 0, flag);
2399 socket_unlock((struct socket *)fp->f_data, 1);
2400 }
2401 else {
2402 postpipeevent((struct pipe *)fp->f_data, flag);
2403 PIPE_UNLOCK((struct pipe *)fp->f_data);
2404 }
2405 fp_drop(p, erp->er_handle, fp, 0);
2406 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,fp->f_data,flag,0);
2407 return(0);
2408 }
2409
2410 /* this routine is called from the close of fd with proc_fdlock held */
2411 int
2412 waitevent_close(struct proc *p, struct fileproc *fp)
2413 {
2414 struct eventqelt *evq;
2415
2416
2417 fp->f_flags &= ~FP_WAITEVENT;
2418
2419 if (fp->f_type == DTYPE_SOCKET) {
2420 socket_lock((struct socket *)fp->f_data, 1);
2421 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2422 }
2423 else if (fp->f_type == DTYPE_PIPE) {
2424 PIPE_LOCK((struct pipe *)fp->f_data);
2425 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2426 }
2427 else {
2428 return(EINVAL);
2429 }
2430 proc_fdunlock(p);
2431
2432
2433 // locate event if possible
2434 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2435 if (evq->ee_proc == p)
2436 break;
2437 }
2438 if (evq == NULL) {
2439 if (fp->f_type == DTYPE_SOCKET)
2440 socket_unlock((struct socket *)fp->f_data, 1);
2441 else
2442 PIPE_UNLOCK((struct pipe *)fp->f_data);
2443
2444 proc_fdlock(p);
2445
2446 return(EINVAL);
2447 }
2448 EVPROCDEQUE(p, evq);
2449
2450 if (fp->f_type == DTYPE_SOCKET) {
2451 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2452 socket_unlock((struct socket *)fp->f_data, 1);
2453 } else {
2454 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2455 PIPE_UNLOCK((struct pipe *)fp->f_data);
2456 }
2457 FREE(evq, M_TEMP);
2458
2459 proc_fdlock(p);
2460
2461 return(0);
2462 }
2463