]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_generic.c
xnu-2050.48.11.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/ioctl.h>
79 #include <sys/file_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/socketvar.h>
82 #include <sys/uio_internal.h>
83 #include <sys/kernel.h>
84 #include <sys/stat.h>
85 #include <sys/malloc.h>
86 #include <sys/sysproto.h>
87
88 #include <sys/mount_internal.h>
89 #include <sys/protosw.h>
90 #include <sys/ev.h>
91 #include <sys/user.h>
92 #include <sys/kdebug.h>
93 #include <sys/poll.h>
94 #include <sys/event.h>
95 #include <sys/eventvar.h>
96 #include <sys/proc.h>
97
98 #include <mach/mach_types.h>
99 #include <kern/kern_types.h>
100 #include <kern/assert.h>
101 #include <kern/kalloc.h>
102 #include <kern/thread.h>
103 #include <kern/clock.h>
104 #include <kern/ledger.h>
105 #include <kern/task.h>
106
107 #include <sys/mbuf.h>
108 #include <sys/socket.h>
109 #include <sys/socketvar.h>
110 #include <sys/errno.h>
111 #include <sys/syscall.h>
112 #include <sys/pipe.h>
113
114 #include <security/audit/audit.h>
115
116 #include <net/if.h>
117 #include <net/route.h>
118
119 #include <netinet/in.h>
120 #include <netinet/in_systm.h>
121 #include <netinet/ip.h>
122 #include <netinet/in_pcb.h>
123 #include <netinet/ip_var.h>
124 #include <netinet/ip6.h>
125 #include <netinet/tcp.h>
126 #include <netinet/tcp_fsm.h>
127 #include <netinet/tcp_seq.h>
128 #include <netinet/tcp_timer.h>
129 #include <netinet/tcp_var.h>
130 #include <netinet/tcpip.h>
131 #include <netinet/tcp_debug.h>
132 /* for wait queue based select */
133 #include <kern/wait_queue.h>
134 #include <kern/kalloc.h>
135 #include <sys/vnode_internal.h>
136
137 /* XXX should be in a header file somewhere */
138 void evsofree(struct socket *);
139 void evpipefree(struct pipe *);
140 void postpipeevent(struct pipe *, int);
141 void postevent(struct socket *, struct sockbuf *, int);
142 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
143
144 int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
145 int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
146 extern void *get_bsduthreadarg(thread_t);
147 extern int *get_bsduthreadrval(thread_t);
148
149 __private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
150 user_addr_t bufp, user_size_t nbyte,
151 off_t offset, int flags, user_ssize_t *retval);
152 __private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
153 user_addr_t bufp, user_size_t nbyte,
154 off_t offset, int flags, user_ssize_t *retval);
155 __private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
156 __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
157
158
159 /* Conflict wait queue for when selects collide (opaque type) */
160 struct wait_queue select_conflict_queue;
161
162 /*
163 * Init routine called from bsd_init.c
164 */
165 void select_wait_queue_init(void);
166 void
167 select_wait_queue_init(void)
168 {
169 wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO);
170 }
171
172
173 #if NETAT
174 extern int appletalk_inited;
175 #endif /* NETAT */
176
177 #define f_flag f_fglob->fg_flag
178 #define f_type f_fglob->fg_type
179 #define f_msgcount f_fglob->fg_msgcount
180 #define f_cred f_fglob->fg_cred
181 #define f_ops f_fglob->fg_ops
182 #define f_offset f_fglob->fg_offset
183 #define f_data f_fglob->fg_data
184
185 /*
186 * Read system call.
187 *
188 * Returns: 0 Success
189 * preparefileread:EBADF
190 * preparefileread:ESPIPE
191 * preparefileread:ENXIO
192 * preparefileread:EBADF
193 * dofileread:???
194 */
195 int
196 read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
197 {
198 __pthread_testcancel(1);
199 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
200 }
201
202 int
203 read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
204 {
205 struct fileproc *fp;
206 int error;
207 int fd = uap->fd;
208 struct vfs_context context;
209
210 if ( (error = preparefileread(p, &fp, fd, 0)) )
211 return (error);
212
213 context = *(vfs_context_current());
214 context.vc_ucred = fp->f_fglob->fg_cred;
215
216 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
217 (off_t)-1, 0, retval);
218
219 donefileread(p, fp, fd);
220
221 return (error);
222 }
223
224 /*
225 * Pread system call
226 *
227 * Returns: 0 Success
228 * preparefileread:EBADF
229 * preparefileread:ESPIPE
230 * preparefileread:ENXIO
231 * preparefileread:EBADF
232 * dofileread:???
233 */
234 int
235 pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
236 {
237 __pthread_testcancel(1);
238 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
239 }
240
241 int
242 pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
243 {
244 struct fileproc *fp = NULL; /* fp set by preparefileread() */
245 int fd = uap->fd;
246 int error;
247 struct vfs_context context;
248
249 if ( (error = preparefileread(p, &fp, fd, 1)) )
250 goto out;
251
252 context = *(vfs_context_current());
253 context.vc_ucred = fp->f_fglob->fg_cred;
254
255 error = dofileread(&context, fp, uap->buf, uap->nbyte,
256 uap->offset, FOF_OFFSET, retval);
257
258 donefileread(p, fp, fd);
259
260 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
261 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
262
263 out:
264 return (error);
265 }
266
267 /*
268 * Code common for read and pread
269 */
270
271 void
272 donefileread(struct proc *p, struct fileproc *fp, int fd)
273 {
274 proc_fdlock_spin(p);
275
276 fp->f_flags &= ~FP_INCHRREAD;
277
278 fp_drop(p, fd, fp, 1);
279 proc_fdunlock(p);
280 }
281
282 /*
283 * Returns: 0 Success
284 * EBADF
285 * ESPIPE
286 * ENXIO
287 * fp_lookup:EBADF
288 * fo_read:???
289 */
290 int
291 preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
292 {
293 vnode_t vp;
294 int error;
295 struct fileproc *fp;
296
297 AUDIT_ARG(fd, fd);
298
299 proc_fdlock_spin(p);
300
301 error = fp_lookup(p, fd, &fp, 1);
302
303 if (error) {
304 proc_fdunlock(p);
305 return (error);
306 }
307 if ((fp->f_flag & FREAD) == 0) {
308 error = EBADF;
309 goto out;
310 }
311 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
312 error = ESPIPE;
313 goto out;
314 }
315 if (fp->f_type == DTYPE_VNODE) {
316 vp = (struct vnode *)fp->f_fglob->fg_data;
317
318 if (check_for_pread && (vnode_isfifo(vp))) {
319 error = ESPIPE;
320 goto out;
321 }
322 if (check_for_pread && (vp->v_flag & VISTTY)) {
323 error = ENXIO;
324 goto out;
325 }
326 if (vp->v_type == VCHR)
327 fp->f_flags |= FP_INCHRREAD;
328 }
329
330 *fp_ret = fp;
331
332 proc_fdunlock(p);
333 return (0);
334
335 out:
336 fp_drop(p, fd, fp, 1);
337 proc_fdunlock(p);
338 return (error);
339 }
340
341
342 /*
343 * Returns: 0 Success
344 * EINVAL
345 * fo_read:???
346 */
347 __private_extern__ int
348 dofileread(vfs_context_t ctx, struct fileproc *fp,
349 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
350 user_ssize_t *retval)
351 {
352 uio_t auio;
353 user_ssize_t bytecnt;
354 long error = 0;
355 char uio_buf[ UIO_SIZEOF(1) ];
356
357 if (nbyte > INT_MAX)
358 return (EINVAL);
359
360 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
361 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
362 &uio_buf[0], sizeof(uio_buf));
363 } else {
364 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
365 &uio_buf[0], sizeof(uio_buf));
366 }
367 uio_addiov(auio, bufp, nbyte);
368
369 bytecnt = nbyte;
370
371 if ((error = fo_read(fp, auio, flags, ctx))) {
372 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
373 error == EINTR || error == EWOULDBLOCK))
374 error = 0;
375 }
376 bytecnt -= uio_resid(auio);
377
378 *retval = bytecnt;
379
380 return (error);
381 }
382
383 /*
384 * Scatter read system call.
385 *
386 * Returns: 0 Success
387 * EINVAL
388 * ENOMEM
389 * copyin:EFAULT
390 * rd_uio:???
391 */
392 int
393 readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
394 {
395 __pthread_testcancel(1);
396 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
397 }
398
399 int
400 readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
401 {
402 uio_t auio = NULL;
403 int error;
404 struct user_iovec *iovp;
405
406 /* Verify range bedfore calling uio_create() */
407 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
408 return (EINVAL);
409
410 /* allocate a uio large enough to hold the number of iovecs passed */
411 auio = uio_create(uap->iovcnt, 0,
412 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
413 UIO_READ);
414
415 /* get location of iovecs within the uio. then copyin the iovecs from
416 * user space.
417 */
418 iovp = uio_iovsaddr(auio);
419 if (iovp == NULL) {
420 error = ENOMEM;
421 goto ExitThisRoutine;
422 }
423 error = copyin_user_iovec_array(uap->iovp,
424 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
425 uap->iovcnt, iovp);
426 if (error) {
427 goto ExitThisRoutine;
428 }
429
430 /* finalize uio_t for use and do the IO
431 */
432 uio_calculateresid(auio);
433 error = rd_uio(p, uap->fd, auio, retval);
434
435 ExitThisRoutine:
436 if (auio != NULL) {
437 uio_free(auio);
438 }
439 return (error);
440 }
441
442 /*
443 * Write system call
444 *
445 * Returns: 0 Success
446 * EBADF
447 * fp_lookup:EBADF
448 * dofilewrite:???
449 */
450 int
451 write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
452 {
453 __pthread_testcancel(1);
454 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
455
456 }
457
458 int
459 write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
460 {
461 struct fileproc *fp;
462 int error;
463 int fd = uap->fd;
464
465 AUDIT_ARG(fd, fd);
466
467 error = fp_lookup(p,fd,&fp,0);
468 if (error)
469 return(error);
470 if ((fp->f_flag & FWRITE) == 0) {
471 error = EBADF;
472 } else {
473 struct vfs_context context = *(vfs_context_current());
474 context.vc_ucred = fp->f_fglob->fg_cred;
475
476 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
477 (off_t)-1, 0, retval);
478 }
479 if (error == 0)
480 fp_drop_written(p, fd, fp);
481 else
482 fp_drop(p, fd, fp, 0);
483 return(error);
484 }
485
486 /*
487 * pwrite system call
488 *
489 * Returns: 0 Success
490 * EBADF
491 * ESPIPE
492 * ENXIO
493 * EINVAL
494 * fp_lookup:EBADF
495 * dofilewrite:???
496 */
497 int
498 pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
499 {
500 __pthread_testcancel(1);
501 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
502 }
503
504 int
505 pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
506 {
507 struct fileproc *fp;
508 int error;
509 int fd = uap->fd;
510 vnode_t vp = (vnode_t)0;
511
512 AUDIT_ARG(fd, fd);
513
514 error = fp_lookup(p,fd,&fp,0);
515 if (error)
516 return(error);
517
518 if ((fp->f_flag & FWRITE) == 0) {
519 error = EBADF;
520 } else {
521 struct vfs_context context = *vfs_context_current();
522 context.vc_ucred = fp->f_fglob->fg_cred;
523
524 if (fp->f_type != DTYPE_VNODE) {
525 error = ESPIPE;
526 goto errout;
527 }
528 vp = (vnode_t)fp->f_fglob->fg_data;
529 if (vnode_isfifo(vp)) {
530 error = ESPIPE;
531 goto errout;
532 }
533 if ((vp->v_flag & VISTTY)) {
534 error = ENXIO;
535 goto errout;
536 }
537 if (uap->offset == (off_t)-1) {
538 error = EINVAL;
539 goto errout;
540 }
541
542 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
543 uap->offset, FOF_OFFSET, retval);
544 }
545 errout:
546 if (error == 0)
547 fp_drop_written(p, fd, fp);
548 else
549 fp_drop(p, fd, fp, 0);
550
551 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
552 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
553
554 return(error);
555 }
556
557 /*
558 * Returns: 0 Success
559 * EINVAL
560 * <fo_write>:EPIPE
561 * <fo_write>:??? [indirect through struct fileops]
562 */
563 __private_extern__ int
564 dofilewrite(vfs_context_t ctx, struct fileproc *fp,
565 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
566 user_ssize_t *retval)
567 {
568 uio_t auio;
569 long error = 0;
570 user_ssize_t bytecnt;
571 char uio_buf[ UIO_SIZEOF(1) ];
572
573 if (nbyte > INT_MAX)
574 return (EINVAL);
575
576 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
577 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
578 &uio_buf[0], sizeof(uio_buf));
579 } else {
580 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
581 &uio_buf[0], sizeof(uio_buf));
582 }
583 uio_addiov(auio, bufp, nbyte);
584
585 bytecnt = nbyte;
586 if ((error = fo_write(fp, auio, flags, ctx))) {
587 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
588 error == EINTR || error == EWOULDBLOCK))
589 error = 0;
590 /* The socket layer handles SIGPIPE */
591 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
592 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0) {
593 /* XXX Raise the signal on the thread? */
594 psignal(vfs_context_proc(ctx), SIGPIPE);
595 }
596 }
597 bytecnt -= uio_resid(auio);
598 *retval = bytecnt;
599
600 return (error);
601 }
602
603 /*
604 * Gather write system call
605 */
606 int
607 writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
608 {
609 __pthread_testcancel(1);
610 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
611 }
612
613 int
614 writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
615 {
616 uio_t auio = NULL;
617 int error;
618 struct user_iovec *iovp;
619
620 AUDIT_ARG(fd, uap->fd);
621
622 /* Verify range bedfore calling uio_create() */
623 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
624 return (EINVAL);
625
626 /* allocate a uio large enough to hold the number of iovecs passed */
627 auio = uio_create(uap->iovcnt, 0,
628 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
629 UIO_WRITE);
630
631 /* get location of iovecs within the uio. then copyin the iovecs from
632 * user space.
633 */
634 iovp = uio_iovsaddr(auio);
635 if (iovp == NULL) {
636 error = ENOMEM;
637 goto ExitThisRoutine;
638 }
639 error = copyin_user_iovec_array(uap->iovp,
640 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
641 uap->iovcnt, iovp);
642 if (error) {
643 goto ExitThisRoutine;
644 }
645
646 /* finalize uio_t for use and do the IO
647 */
648 uio_calculateresid(auio);
649 error = wr_uio(p, uap->fd, auio, retval);
650
651 ExitThisRoutine:
652 if (auio != NULL) {
653 uio_free(auio);
654 }
655 return (error);
656 }
657
658
659 int
660 wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
661 {
662 struct fileproc *fp;
663 int error;
664 user_ssize_t count;
665 struct vfs_context context = *vfs_context_current();
666
667 error = fp_lookup(p,fdes,&fp,0);
668 if (error)
669 return(error);
670
671 if ((fp->f_flag & FWRITE) == 0) {
672 error = EBADF;
673 goto out;
674 }
675 count = uio_resid(uio);
676
677 context.vc_ucred = fp->f_cred;
678 error = fo_write(fp, uio, 0, &context);
679 if (error) {
680 if (uio_resid(uio) != count && (error == ERESTART ||
681 error == EINTR || error == EWOULDBLOCK))
682 error = 0;
683 /* The socket layer handles SIGPIPE */
684 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
685 (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) == 0)
686 psignal(p, SIGPIPE);
687 }
688 *retval = count - uio_resid(uio);
689
690 out:
691 if (error == 0)
692 fp_drop_written(p, fdes, fp);
693 else
694 fp_drop(p, fdes, fp, 0);
695 return(error);
696 }
697
698
699 int
700 rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
701 {
702 struct fileproc *fp;
703 int error;
704 user_ssize_t count;
705 struct vfs_context context = *vfs_context_current();
706
707 if ( (error = preparefileread(p, &fp, fdes, 0)) )
708 return (error);
709
710 count = uio_resid(uio);
711
712 context.vc_ucred = fp->f_cred;
713
714 error = fo_read(fp, uio, 0, &context);
715
716 if (error) {
717 if (uio_resid(uio) != count && (error == ERESTART ||
718 error == EINTR || error == EWOULDBLOCK))
719 error = 0;
720 }
721 *retval = count - uio_resid(uio);
722
723 donefileread(p, fp, fdes);
724
725 return (error);
726 }
727
728 /*
729 * Ioctl system call
730 *
731 * Returns: 0 Success
732 * EBADF
733 * ENOTTY
734 * ENOMEM
735 * ESRCH
736 * copyin:EFAULT
737 * copyoutEFAULT
738 * fp_lookup:EBADF Bad file descriptor
739 * fo_ioctl:???
740 */
741 int
742 ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
743 {
744 struct fileproc *fp;
745 u_long com;
746 int error = 0;
747 u_int size;
748 caddr_t datap, memp;
749 boolean_t is64bit;
750 int tmp;
751 #define STK_PARAMS 128
752 char stkbuf[STK_PARAMS];
753 int fd = uap->fd;
754 struct vfs_context context = *vfs_context_current();
755
756 AUDIT_ARG(fd, uap->fd);
757 AUDIT_ARG(addr, uap->data);
758
759 is64bit = proc_is64bit(p);
760 #if CONFIG_AUDIT
761 if (is64bit)
762 AUDIT_ARG(value64, uap->com);
763 else
764 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, uap->com));
765 #endif /* CONFIG_AUDIT */
766
767 proc_fdlock(p);
768 error = fp_lookup(p,fd,&fp,1);
769 if (error) {
770 proc_fdunlock(p);
771 return(error);
772 }
773
774 AUDIT_ARG(file, p, fp);
775
776 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
777 error = EBADF;
778 goto out;
779 }
780
781 context.vc_ucred = fp->f_fglob->fg_cred;
782
783 #if CONFIG_MACF
784 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com);
785 if (error)
786 goto out;
787 #endif
788
789 #if NETAT
790 /*
791 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
792 * while implementing an ATioctl system call
793 */
794 {
795 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
796 u_long fixed_command;
797
798 #ifdef APPLETALK_DEBUG
799 kprintf("ioctl: special AppleTalk \n");
800 #endif
801 datap = &stkbuf[0];
802 *(user_addr_t *)datap = uap->data;
803 fixed_command = _IOW(0, 0xff99, uap->data);
804 error = fo_ioctl(fp, fixed_command, datap, &context);
805 goto out;
806 }
807 }
808
809 #endif /* NETAT */
810
811
812 switch (com = uap->com) {
813 case FIONCLEX:
814 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
815 error =0;
816 goto out;
817 case FIOCLEX:
818 *fdflags(p, uap->fd) |= UF_EXCLOSE;
819 error =0;
820 goto out;
821 }
822
823 /*
824 * Interpret high order word to find amount of data to be
825 * copied to/from the user's address space.
826 */
827 size = IOCPARM_LEN(com);
828 if (size > IOCPARM_MAX) {
829 error = ENOTTY;
830 goto out;
831 }
832 memp = NULL;
833 if (size > sizeof (stkbuf)) {
834 proc_fdunlock(p);
835 if ((memp = (caddr_t)kalloc(size)) == 0) {
836 proc_fdlock(p);
837 error = ENOMEM;
838 goto out;
839 }
840 proc_fdlock(p);
841 datap = memp;
842 } else
843 datap = &stkbuf[0];
844 if (com&IOC_IN) {
845 if (size) {
846 proc_fdunlock(p);
847 error = copyin(uap->data, datap, size);
848 if (error) {
849 if (memp)
850 kfree(memp, size);
851 proc_fdlock(p);
852 goto out;
853 }
854 proc_fdlock(p);
855 } else {
856 /* XXX - IOC_IN and no size? we should proably return an error here!! */
857 if (is64bit) {
858 *(user_addr_t *)datap = uap->data;
859 }
860 else {
861 *(uint32_t *)datap = (uint32_t)uap->data;
862 }
863 }
864 } else if ((com&IOC_OUT) && size)
865 /*
866 * Zero the buffer so the user always
867 * gets back something deterministic.
868 */
869 bzero(datap, size);
870 else if (com&IOC_VOID) {
871 /* XXX - this is odd since IOC_VOID means no parameters */
872 if (is64bit) {
873 *(user_addr_t *)datap = uap->data;
874 }
875 else {
876 *(uint32_t *)datap = (uint32_t)uap->data;
877 }
878 }
879
880 switch (com) {
881
882 case FIONBIO:
883 if ( (tmp = *(int *)datap) )
884 fp->f_flag |= FNONBLOCK;
885 else
886 fp->f_flag &= ~FNONBLOCK;
887 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
888 break;
889
890 case FIOASYNC:
891 if ( (tmp = *(int *)datap) )
892 fp->f_flag |= FASYNC;
893 else
894 fp->f_flag &= ~FASYNC;
895 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
896 break;
897
898 case FIOSETOWN:
899 tmp = *(int *)datap;
900 if (fp->f_type == DTYPE_SOCKET) {
901 ((struct socket *)fp->f_data)->so_pgid = tmp;
902 error = 0;
903 break;
904 }
905 if (fp->f_type == DTYPE_PIPE) {
906 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
907 break;
908 }
909 if (tmp <= 0) {
910 tmp = -tmp;
911 } else {
912 struct proc *p1 = proc_find(tmp);
913 if (p1 == 0) {
914 error = ESRCH;
915 break;
916 }
917 tmp = p1->p_pgrpid;
918 proc_rele(p1);
919 }
920 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
921 break;
922
923 case FIOGETOWN:
924 if (fp->f_type == DTYPE_SOCKET) {
925 error = 0;
926 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
927 break;
928 }
929 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
930 *(int *)datap = -*(int *)datap;
931 break;
932
933 default:
934 error = fo_ioctl(fp, com, datap, &context);
935 /*
936 * Copy any data to user, size was
937 * already set and checked above.
938 */
939 if (error == 0 && (com&IOC_OUT) && size)
940 error = copyout(datap, uap->data, (u_int)size);
941 break;
942 }
943 proc_fdunlock(p);
944 if (memp)
945 kfree(memp, size);
946 proc_fdlock(p);
947 out:
948 fp_drop(p, fd, fp, 1);
949 proc_fdunlock(p);
950 return(error);
951 }
952
953 int selwait, nselcoll;
954 #define SEL_FIRSTPASS 1
955 #define SEL_SECONDPASS 2
956 extern int selcontinue(int error);
957 extern int selprocess(int error, int sel_pass);
958 static int selscan(struct proc *p, struct _select * sel,
959 int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub);
960 static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
961 static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount);
962 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
963
964 /*
965 * Select system call.
966 *
967 * Returns: 0 Success
968 * EINVAL Invalid argument
969 * EAGAIN Nonconformant error if allocation fails
970 * selprocess:???
971 */
972 int
973 select(struct proc *p, struct select_args *uap, int32_t *retval)
974 {
975 __pthread_testcancel(1);
976 return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
977 }
978
979 int
980 select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
981 {
982 int error = 0;
983 u_int ni, nw, size;
984 thread_t th_act;
985 struct uthread *uth;
986 struct _select *sel;
987 int needzerofill = 1;
988 int count = 0;
989
990 th_act = current_thread();
991 uth = get_bsdthread_info(th_act);
992 sel = &uth->uu_select;
993 retval = (int *)get_bsduthreadrval(th_act);
994 *retval = 0;
995
996 if (uap->nd < 0) {
997 return (EINVAL);
998 }
999
1000 /* select on thread of process that already called proc_exit() */
1001 if (p->p_fd == NULL) {
1002 return (EBADF);
1003 }
1004
1005 if (uap->nd > p->p_fd->fd_nfiles)
1006 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
1007
1008 nw = howmany(uap->nd, NFDBITS);
1009 ni = nw * sizeof(fd_mask);
1010
1011 /*
1012 * if the previously allocated space for the bits is smaller than
1013 * what is requested or no space has yet been allocated for this
1014 * thread, allocate enough space now.
1015 *
1016 * Note: If this process fails, select() will return EAGAIN; this
1017 * is the same thing pool() returns in a no-memory situation, but
1018 * it is not a POSIX compliant error code for select().
1019 */
1020 if (sel->nbytes < (3 * ni)) {
1021 int nbytes = 3 * ni;
1022
1023 /* Free previous allocation, if any */
1024 if (sel->ibits != NULL)
1025 FREE(sel->ibits, M_TEMP);
1026 if (sel->obits != NULL) {
1027 FREE(sel->obits, M_TEMP);
1028 /* NULL out; subsequent ibits allocation may fail */
1029 sel->obits = NULL;
1030 }
1031
1032 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1033 if (sel->ibits == NULL)
1034 return (EAGAIN);
1035 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1036 if (sel->obits == NULL) {
1037 FREE(sel->ibits, M_TEMP);
1038 sel->ibits = NULL;
1039 return (EAGAIN);
1040 }
1041 sel->nbytes = nbytes;
1042 needzerofill = 0;
1043 }
1044
1045 if (needzerofill) {
1046 bzero((caddr_t)sel->ibits, sel->nbytes);
1047 bzero((caddr_t)sel->obits, sel->nbytes);
1048 }
1049
1050 /*
1051 * get the bits from the user address space
1052 */
1053 #define getbits(name, x) \
1054 do { \
1055 if (uap->name && (error = copyin(uap->name, \
1056 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1057 goto continuation; \
1058 } while (0)
1059
1060 getbits(in, 0);
1061 getbits(ou, 1);
1062 getbits(ex, 2);
1063 #undef getbits
1064
1065 if (uap->tv) {
1066 struct timeval atv;
1067 if (IS_64BIT_PROCESS(p)) {
1068 struct user64_timeval atv64;
1069 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1070 /* Loses resolution - assume timeout < 68 years */
1071 atv.tv_sec = atv64.tv_sec;
1072 atv.tv_usec = atv64.tv_usec;
1073 } else {
1074 struct user32_timeval atv32;
1075 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
1076 atv.tv_sec = atv32.tv_sec;
1077 atv.tv_usec = atv32.tv_usec;
1078 }
1079 if (error)
1080 goto continuation;
1081 if (itimerfix(&atv)) {
1082 error = EINVAL;
1083 goto continuation;
1084 }
1085
1086 clock_absolutetime_interval_to_deadline(
1087 tvtoabstime(&atv), &sel->abstime);
1088 }
1089 else
1090 sel->abstime = 0;
1091
1092 if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) {
1093 goto continuation;
1094 }
1095
1096 sel->count = count;
1097 size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
1098 if (uth->uu_allocsize) {
1099 if (uth->uu_wqset == 0)
1100 panic("select: wql memory smashed");
1101 /* needed for the select now */
1102 if (size > uth->uu_allocsize) {
1103 kfree(uth->uu_wqset, uth->uu_allocsize);
1104 uth->uu_allocsize = size;
1105 uth->uu_wqset = (wait_queue_set_t)kalloc(size);
1106 if (uth->uu_wqset == (wait_queue_set_t)NULL)
1107 panic("failed to allocate memory for waitqueue\n");
1108 }
1109 } else {
1110 uth->uu_allocsize = size;
1111 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
1112 if (uth->uu_wqset == (wait_queue_set_t)NULL)
1113 panic("failed to allocate memory for waitqueue\n");
1114 }
1115 bzero(uth->uu_wqset, size);
1116 sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
1117 wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
1118
1119 continuation:
1120
1121 if (error) {
1122 /*
1123 * We have already cleaned up any state we established,
1124 * either locally or as a result of selcount(). We don't
1125 * need to wait_subqueue_unlink_all(), since we haven't set
1126 * anything at this point.
1127 */
1128 return (error);
1129 }
1130
1131 return selprocess(0, SEL_FIRSTPASS);
1132 }
1133
1134 int
1135 selcontinue(int error)
1136 {
1137 return selprocess(error, SEL_SECONDPASS);
1138 }
1139
1140
1141 /*
1142 * selprocess
1143 *
1144 * Parameters: error The error code from our caller
1145 * sel_pass The pass we are on
1146 */
1147 int
1148 selprocess(int error, int sel_pass)
1149 {
1150 int ncoll;
1151 u_int ni, nw;
1152 thread_t th_act;
1153 struct uthread *uth;
1154 struct proc *p;
1155 struct select_args *uap;
1156 int *retval;
1157 struct _select *sel;
1158 int unwind = 1;
1159 int prepost = 0;
1160 int somewakeup = 0;
1161 int doretry = 0;
1162 wait_result_t wait_result;
1163
1164 p = current_proc();
1165 th_act = current_thread();
1166 uap = (struct select_args *)get_bsduthreadarg(th_act);
1167 retval = (int *)get_bsduthreadrval(th_act);
1168 uth = get_bsdthread_info(th_act);
1169 sel = &uth->uu_select;
1170
1171 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1172 unwind = 0;
1173 if (sel->count == 0)
1174 unwind = 0;
1175 retry:
1176 if (error != 0) {
1177 sel_pass = SEL_FIRSTPASS; /* Reset for seldrop */
1178 goto done;
1179 }
1180
1181 ncoll = nselcoll;
1182 OSBitOrAtomic(P_SELECT, &p->p_flag);
1183 /* skip scans if the select is just for timeouts */
1184 if (sel->count) {
1185 /*
1186 * Clear out any dangling refs from prior calls; technically
1187 * there should not be any.
1188 */
1189 if (sel_pass == SEL_FIRSTPASS)
1190 wait_queue_sub_clearrefs(uth->uu_wqset);
1191
1192 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
1193 if (error || *retval) {
1194 goto done;
1195 }
1196 if (prepost) {
1197 /* if the select of log, then we canwakeup and discover some one
1198 * else already read the data; go toselct again if time permits
1199 */
1200 prepost = 0;
1201 doretry = 1;
1202 }
1203 if (somewakeup) {
1204 somewakeup = 0;
1205 doretry = 1;
1206 }
1207 }
1208
1209 if (uap->tv) {
1210 uint64_t now;
1211
1212 clock_get_uptime(&now);
1213 if (now >= sel->abstime)
1214 goto done;
1215 }
1216
1217 if (doretry) {
1218 /* cleanup obits and try again */
1219 doretry = 0;
1220 sel_pass = SEL_FIRSTPASS;
1221 goto retry;
1222 }
1223
1224 /*
1225 * To effect a poll, the timeout argument should be
1226 * non-nil, pointing to a zero-valued timeval structure.
1227 */
1228 if (uap->tv && sel->abstime == 0) {
1229 goto done;
1230 }
1231
1232 /* No spurious wakeups due to colls,no need to check for them */
1233 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1234 sel_pass = SEL_FIRSTPASS;
1235 goto retry;
1236 }
1237
1238 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1239
1240 /* if the select is just for timeout skip check */
1241 if (sel->count &&(sel_pass == SEL_SECONDPASS))
1242 panic("selprocess: 2nd pass assertwaiting");
1243
1244 /* Wait Queue Subordinate has waitqueue as first element */
1245 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset,
1246 NULL, THREAD_ABORTSAFE, sel->abstime);
1247 if (wait_result != THREAD_AWAKENED) {
1248 /* there are no preposted events */
1249 error = tsleep1(NULL, PSOCK | PCATCH,
1250 "select", 0, selcontinue);
1251 } else {
1252 prepost = 1;
1253 error = 0;
1254 }
1255
1256 if (error == 0) {
1257 sel_pass = SEL_SECONDPASS;
1258 if (!prepost)
1259 somewakeup = 1;
1260 goto retry;
1261 }
1262 done:
1263 if (unwind) {
1264 wait_subqueue_unlink_all(uth->uu_wqset);
1265 seldrop(p, sel->ibits, uap->nd);
1266 }
1267 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1268 /* select is not restarted after signals... */
1269 if (error == ERESTART)
1270 error = EINTR;
1271 if (error == EWOULDBLOCK)
1272 error = 0;
1273 nw = howmany(uap->nd, NFDBITS);
1274 ni = nw * sizeof(fd_mask);
1275
1276 #define putbits(name, x) \
1277 do { \
1278 if (uap->name && (error2 = \
1279 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1280 error = error2; \
1281 } while (0)
1282
1283 if (error == 0) {
1284 int error2;
1285
1286 putbits(in, 0);
1287 putbits(ou, 1);
1288 putbits(ex, 2);
1289 #undef putbits
1290 }
1291 return(error);
1292 }
1293
1294
1295 /*
1296 * selscan
1297 *
1298 * Parameters: p Process performing the select
1299 * sel The per-thread select context structure
1300 * nfd The number of file descriptors to scan
1301 * retval The per thread system call return area
1302 * sel_pass Which pass this is; allowed values are
1303 * SEL_FIRSTPASS and SEL_SECONDPASS
1304 * wqsub The per thread wait queue set
1305 *
1306 * Returns: 0 Success
1307 * EIO Invalid p->p_fd field XXX Obsolete?
1308 * EBADF One of the files in the bit vector is
1309 * invalid.
1310 */
1311 static int
1312 selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval,
1313 int sel_pass, wait_queue_sub_t wqsub)
1314 {
1315 struct filedesc *fdp = p->p_fd;
1316 int msk, i, j, fd;
1317 u_int32_t bits;
1318 struct fileproc *fp;
1319 int n = 0; /* count of bits */
1320 int nc = 0; /* bit vector offset (nc'th bit) */
1321 static int flag[3] = { FREAD, FWRITE, 0 };
1322 u_int32_t *iptr, *optr;
1323 u_int nw;
1324 u_int32_t *ibits, *obits;
1325 char * wql;
1326 char * wql_ptr;
1327 int count;
1328 struct vfs_context context = *vfs_context_current();
1329
1330 /*
1331 * Problems when reboot; due to MacOSX signal probs
1332 * in Beaker1C ; verify that the p->p_fd is valid
1333 */
1334 if (fdp == NULL) {
1335 *retval=0;
1336 return(EIO);
1337 }
1338 ibits = sel->ibits;
1339 obits = sel->obits;
1340 wql = sel->wql;
1341
1342 nw = howmany(nfd, NFDBITS);
1343
1344 count = sel->count;
1345
1346 nc = 0;
1347 if (count) {
1348 proc_fdlock(p);
1349 for (msk = 0; msk < 3; msk++) {
1350 iptr = (u_int32_t *)&ibits[msk * nw];
1351 optr = (u_int32_t *)&obits[msk * nw];
1352
1353 for (i = 0; i < nfd; i += NFDBITS) {
1354 bits = iptr[i/NFDBITS];
1355
1356 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1357 bits &= ~(1 << j);
1358 fp = fdp->fd_ofiles[fd];
1359
1360 if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1361 /*
1362 * If we abort because of a bad
1363 * fd, let the caller unwind...
1364 */
1365 proc_fdunlock(p);
1366 return(EBADF);
1367 }
1368 if (sel_pass == SEL_SECONDPASS) {
1369 wql_ptr = (char *)0;
1370 if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)wqsub)) {
1371 fp->f_flags &= ~FP_INSELECT;
1372 fp->f_waddr = (void *)0;
1373 }
1374 } else {
1375 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1376 if (fp->f_flags & FP_INSELECT) {
1377 /* someone is already in select on this fp */
1378 fp->f_flags |= FP_SELCONFLICT;
1379 wait_queue_link(&select_conflict_queue, (wait_queue_set_t)wqsub);
1380 } else {
1381 fp->f_flags |= FP_INSELECT;
1382 fp->f_waddr = (void *)wqsub;
1383 }
1384 }
1385
1386 context.vc_ucred = fp->f_cred;
1387
1388 /* The select; set the bit, if true */
1389 if (fp->f_ops
1390 && fo_select(fp, flag[msk], wql_ptr, &context)) {
1391 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1392 n++;
1393 }
1394 nc++;
1395 }
1396 }
1397 }
1398 proc_fdunlock(p);
1399 }
1400 *retval = n;
1401 return (0);
1402 }
1403
1404 int poll_callback(struct kqueue *, struct kevent64_s *, void *);
1405
1406 struct poll_continue_args {
1407 user_addr_t pca_fds;
1408 u_int pca_nfds;
1409 u_int pca_rfds;
1410 };
1411
1412 int
1413 poll(struct proc *p, struct poll_args *uap, int32_t *retval)
1414 {
1415 __pthread_testcancel(1);
1416 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1417 }
1418
1419
1420 int
1421 poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1422 {
1423 struct poll_continue_args *cont;
1424 struct pollfd *fds;
1425 struct kqueue *kq;
1426 struct timeval atv;
1427 int ncoll, error = 0;
1428 u_int nfds = uap->nfds;
1429 u_int rfds = 0;
1430 u_int i;
1431 size_t ni;
1432
1433 /*
1434 * This is kinda bogus. We have fd limits, but that is not
1435 * really related to the size of the pollfd array. Make sure
1436 * we let the process use at least FD_SETSIZE entries and at
1437 * least enough for the current limits. We want to be reasonably
1438 * safe, but not overly restrictive.
1439 */
1440 if (nfds > OPEN_MAX ||
1441 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
1442 return (EINVAL);
1443
1444 kq = kqueue_alloc(p);
1445 if (kq == NULL)
1446 return (EAGAIN);
1447
1448 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1449 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1450 if (NULL == cont) {
1451 error = EAGAIN;
1452 goto out;
1453 }
1454
1455 fds = (struct pollfd *)&cont[1];
1456 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1457 if (error)
1458 goto out;
1459
1460 if (uap->timeout != -1) {
1461 struct timeval rtv;
1462
1463 atv.tv_sec = uap->timeout / 1000;
1464 atv.tv_usec = (uap->timeout % 1000) * 1000;
1465 if (itimerfix(&atv)) {
1466 error = EINVAL;
1467 goto out;
1468 }
1469 getmicrouptime(&rtv);
1470 timevaladd(&atv, &rtv);
1471 } else {
1472 atv.tv_sec = 0;
1473 atv.tv_usec = 0;
1474 }
1475
1476 /* JMM - all this P_SELECT stuff is bogus */
1477 ncoll = nselcoll;
1478 OSBitOrAtomic(P_SELECT, &p->p_flag);
1479 for (i = 0; i < nfds; i++) {
1480 short events = fds[i].events;
1481 struct kevent64_s kev;
1482 int kerror = 0;
1483
1484 /* per spec, ignore fd values below zero */
1485 if (fds[i].fd < 0) {
1486 fds[i].revents = 0;
1487 continue;
1488 }
1489
1490 /* convert the poll event into a kqueue kevent */
1491 kev.ident = fds[i].fd;
1492 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
1493 kev.udata = CAST_USER_ADDR_T(&fds[i]);
1494 kev.fflags = 0;
1495 kev.data = 0;
1496 kev.ext[0] = 0;
1497 kev.ext[1] = 0;
1498
1499 /* Handle input events */
1500 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
1501 kev.filter = EVFILT_READ;
1502 if (!(events & ( POLLIN | POLLRDNORM )))
1503 kev.flags |= EV_OOBAND;
1504 kerror = kevent_register(kq, &kev, p);
1505 }
1506
1507 /* Handle output events */
1508 if (kerror == 0 &&
1509 events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1510 kev.filter = EVFILT_WRITE;
1511 kerror = kevent_register(kq, &kev, p);
1512 }
1513
1514 /* Handle BSD extension vnode events */
1515 if (kerror == 0 &&
1516 events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1517 kev.filter = EVFILT_VNODE;
1518 kev.fflags = 0;
1519 if (events & POLLEXTEND)
1520 kev.fflags |= NOTE_EXTEND;
1521 if (events & POLLATTRIB)
1522 kev.fflags |= NOTE_ATTRIB;
1523 if (events & POLLNLINK)
1524 kev.fflags |= NOTE_LINK;
1525 if (events & POLLWRITE)
1526 kev.fflags |= NOTE_WRITE;
1527 kerror = kevent_register(kq, &kev, p);
1528 }
1529
1530 if (kerror != 0) {
1531 fds[i].revents = POLLNVAL;
1532 rfds++;
1533 } else
1534 fds[i].revents = 0;
1535 }
1536
1537 /* Did we have any trouble registering? */
1538 if (rfds > 0)
1539 goto done;
1540
1541 /* scan for, and possibly wait for, the kevents to trigger */
1542 cont->pca_fds = uap->fds;
1543 cont->pca_nfds = nfds;
1544 cont->pca_rfds = rfds;
1545 error = kqueue_scan(kq, poll_callback, NULL, cont, &atv, p);
1546 rfds = cont->pca_rfds;
1547
1548 done:
1549 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1550 /* poll is not restarted after signals... */
1551 if (error == ERESTART)
1552 error = EINTR;
1553 if (error == EWOULDBLOCK)
1554 error = 0;
1555 if (error == 0) {
1556 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1557 *retval = rfds;
1558 }
1559
1560 out:
1561 if (NULL != cont)
1562 FREE(cont, M_TEMP);
1563
1564 kqueue_dealloc(kq);
1565 return (error);
1566 }
1567
1568 int
1569 poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data)
1570 {
1571 struct poll_continue_args *cont = (struct poll_continue_args *)data;
1572 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
1573 short prev_revents = fds->revents;
1574 short mask;
1575
1576 /* convert the results back into revents */
1577 if (kevp->flags & EV_EOF)
1578 fds->revents |= POLLHUP;
1579 if (kevp->flags & EV_ERROR)
1580 fds->revents |= POLLERR;
1581
1582 switch (kevp->filter) {
1583 case EVFILT_READ:
1584 if (fds->revents & POLLHUP)
1585 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1586 else {
1587 mask = 0;
1588 if (kevp->data != 0)
1589 mask |= (POLLIN | POLLRDNORM );
1590 if (kevp->flags & EV_OOBAND)
1591 mask |= ( POLLPRI | POLLRDBAND );
1592 }
1593 fds->revents |= (fds->events & mask);
1594 break;
1595
1596 case EVFILT_WRITE:
1597 if (!(fds->revents & POLLHUP))
1598 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1599 break;
1600
1601 case EVFILT_VNODE:
1602 if (kevp->fflags & NOTE_EXTEND)
1603 fds->revents |= (fds->events & POLLEXTEND);
1604 if (kevp->fflags & NOTE_ATTRIB)
1605 fds->revents |= (fds->events & POLLATTRIB);
1606 if (kevp->fflags & NOTE_LINK)
1607 fds->revents |= (fds->events & POLLNLINK);
1608 if (kevp->fflags & NOTE_WRITE)
1609 fds->revents |= (fds->events & POLLWRITE);
1610 break;
1611 }
1612
1613 if (fds->revents != 0 && prev_revents == 0)
1614 cont->pca_rfds++;
1615
1616 return 0;
1617 }
1618
1619 int
1620 seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1621 {
1622
1623 return (1);
1624 }
1625
1626 /*
1627 * selcount
1628 *
1629 * Count the number of bits set in the input bit vector, and establish an
1630 * outstanding fp->f_iocount for each of the descriptors which will be in
1631 * use in the select operation.
1632 *
1633 * Parameters: p The process doing the select
1634 * ibits The input bit vector
1635 * nfd The number of fd's in the vector
1636 * countp Pointer to where to store the bit count
1637 *
1638 * Returns: 0 Success
1639 * EIO Bad per process open file table
1640 * EBADF One of the bits in the input bit vector
1641 * references an invalid fd
1642 *
1643 * Implicit: *countp (modified) Count of fd's
1644 *
1645 * Notes: This function is the first pass under the proc_fdlock() that
1646 * permits us to recognize invalid descriptors in the bit vector;
1647 * the may, however, not remain valid through the drop and
1648 * later reacquisition of the proc_fdlock().
1649 */
1650 static int
1651 selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
1652 {
1653 struct filedesc *fdp = p->p_fd;
1654 int msk, i, j, fd;
1655 u_int32_t bits;
1656 struct fileproc *fp;
1657 int n = 0;
1658 u_int32_t *iptr;
1659 u_int nw;
1660 int error=0;
1661 int dropcount;
1662 int need_wakeup = 0;
1663
1664 /*
1665 * Problems when reboot; due to MacOSX signal probs
1666 * in Beaker1C ; verify that the p->p_fd is valid
1667 */
1668 if (fdp == NULL) {
1669 *countp = 0;
1670 return(EIO);
1671 }
1672 nw = howmany(nfd, NFDBITS);
1673
1674 proc_fdlock(p);
1675 for (msk = 0; msk < 3; msk++) {
1676 iptr = (u_int32_t *)&ibits[msk * nw];
1677 for (i = 0; i < nfd; i += NFDBITS) {
1678 bits = iptr[i/NFDBITS];
1679 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1680 bits &= ~(1 << j);
1681 fp = fdp->fd_ofiles[fd];
1682 if (fp == NULL ||
1683 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1684 *countp = 0;
1685 error = EBADF;
1686 goto bad;
1687 }
1688 fp->f_iocount++;
1689 n++;
1690 }
1691 }
1692 }
1693 proc_fdunlock(p);
1694
1695 *countp = n;
1696 return (0);
1697
1698 bad:
1699 dropcount = 0;
1700
1701 if (n== 0)
1702 goto out;
1703 /* Ignore error return; it's already EBADF */
1704 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup, 1);
1705
1706 out:
1707 proc_fdunlock(p);
1708 if (need_wakeup) {
1709 wakeup(&p->p_fpdrainwait);
1710 }
1711 return(error);
1712 }
1713
1714
1715 /*
1716 * seldrop_locked
1717 *
1718 * Drop outstanding wait queue references set up during selscan(); drop the
1719 * outstanding per fileproc f_iocount() picked up during the selcount().
1720 *
1721 * Parameters: p Process performing the select
1722 * ibits Input pit bector of fd's
1723 * nfd Number of fd's
1724 * lim Limit to number of vector entries to
1725 * consider, or -1 for "all"
1726 * inselect True if
1727 * need_wakeup Pointer to flag to set to do a wakeup
1728 * if f_iocont on any descriptor goes to 0
1729 *
1730 * Returns: 0 Success
1731 * EBADF One or more fds in the bit vector
1732 * were invalid, but the rest
1733 * were successfully dropped
1734 *
1735 * Notes: An fd make become bad while the proc_fdlock() is not held,
1736 * if a multithreaded application closes the fd out from under
1737 * the in progress select. In this case, we still have to
1738 * clean up after the set up on the remaining fds.
1739 */
1740 static int
1741 seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup, int fromselcount)
1742 {
1743 struct filedesc *fdp = p->p_fd;
1744 int msk, i, j, fd;
1745 u_int32_t bits;
1746 struct fileproc *fp;
1747 u_int32_t *iptr;
1748 u_int nw;
1749 int error = 0;
1750 int dropcount = 0;
1751 uthread_t uth = get_bsdthread_info(current_thread());
1752
1753 *need_wakeup = 0;
1754
1755 /*
1756 * Problems when reboot; due to MacOSX signal probs
1757 * in Beaker1C ; verify that the p->p_fd is valid
1758 */
1759 if (fdp == NULL) {
1760 return(EIO);
1761 }
1762
1763 nw = howmany(nfd, NFDBITS);
1764
1765 for (msk = 0; msk < 3; msk++) {
1766 iptr = (u_int32_t *)&ibits[msk * nw];
1767 for (i = 0; i < nfd; i += NFDBITS) {
1768 bits = iptr[i/NFDBITS];
1769 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1770 bits &= ~(1 << j);
1771 fp = fdp->fd_ofiles[fd];
1772 /*
1773 * If we've already dropped as many as were
1774 * counted/scanned, then we are done.
1775 */
1776 if ((fromselcount != 0) && (++dropcount > lim))
1777 goto done;
1778
1779 if (fp == NULL) {
1780 /* skip (now) bad fds */
1781 error = EBADF;
1782 continue;
1783 }
1784 /*
1785 * Only clear the flag if we set it. We'll
1786 * only find that we set it if we had made
1787 * at least one [partial] pass through selscan().
1788 */
1789 if ((fp->f_flags & FP_INSELECT) && (fp->f_waddr == (void *)uth->uu_wqset)) {
1790 fp->f_flags &= ~FP_INSELECT;
1791 fp->f_waddr = (void *)0;
1792 }
1793
1794 fp->f_iocount--;
1795 if (fp->f_iocount < 0)
1796 panic("f_iocount overdecrement!");
1797
1798 if (fp->f_iocount == 0) {
1799 /*
1800 * The last iocount is responsible for clearing
1801 * selconfict flag - even if we didn't set it -
1802 * and is also responsible for waking up anyone
1803 * waiting on iocounts to drain.
1804 */
1805 if (fp->f_flags & FP_SELCONFLICT)
1806 fp->f_flags &= ~FP_SELCONFLICT;
1807 if (p->p_fpdrainwait) {
1808 p->p_fpdrainwait = 0;
1809 *need_wakeup = 1;
1810 }
1811 }
1812 }
1813 }
1814 }
1815 done:
1816 return (error);
1817 }
1818
1819
1820 static int
1821 seldrop(struct proc *p, u_int32_t *ibits, int nfd)
1822 {
1823 int error;
1824 int need_wakeup = 0;
1825
1826 proc_fdlock(p);
1827 error = seldrop_locked(p, ibits, nfd, nfd, &need_wakeup, 0);
1828 proc_fdunlock(p);
1829 if (need_wakeup) {
1830 wakeup(&p->p_fpdrainwait);
1831 }
1832 return (error);
1833 }
1834
1835 /*
1836 * Record a select request.
1837 */
1838 void
1839 selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1840 {
1841 thread_t cur_act = current_thread();
1842 struct uthread * ut = get_bsdthread_info(cur_act);
1843
1844 /* need to look at collisions */
1845
1846 /*do not record if this is second pass of select */
1847 if(p_wql == (void *)0) {
1848 return;
1849 }
1850
1851 if ((sip->si_flags & SI_INITED) == 0) {
1852 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
1853 sip->si_flags |= SI_INITED;
1854 sip->si_flags &= ~SI_CLEAR;
1855 }
1856
1857 if (sip->si_flags & SI_RECORDED) {
1858 sip->si_flags |= SI_COLL;
1859 } else
1860 sip->si_flags &= ~SI_COLL;
1861
1862 sip->si_flags |= SI_RECORDED;
1863 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
1864 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
1865 (wait_queue_link_t)p_wql);
1866
1867 return;
1868 }
1869
1870 void
1871 selwakeup(struct selinfo *sip)
1872 {
1873
1874 if ((sip->si_flags & SI_INITED) == 0) {
1875 return;
1876 }
1877
1878 if (sip->si_flags & SI_COLL) {
1879 nselcoll++;
1880 sip->si_flags &= ~SI_COLL;
1881 #if 0
1882 /* will not support */
1883 //wakeup((caddr_t)&selwait);
1884 #endif
1885 }
1886
1887 if (sip->si_flags & SI_RECORDED) {
1888 wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED);
1889 sip->si_flags &= ~SI_RECORDED;
1890 }
1891
1892 }
1893
1894 void
1895 selthreadclear(struct selinfo *sip)
1896 {
1897
1898 if ((sip->si_flags & SI_INITED) == 0) {
1899 return;
1900 }
1901 if (sip->si_flags & SI_RECORDED) {
1902 selwakeup(sip);
1903 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1904 }
1905 sip->si_flags |= SI_CLEAR;
1906 wait_queue_unlink_all(&sip->si_wait_queue);
1907 }
1908
1909
1910
1911
1912 #define DBG_POST 0x10
1913 #define DBG_WATCH 0x11
1914 #define DBG_WAIT 0x12
1915 #define DBG_MOD 0x13
1916 #define DBG_EWAKEUP 0x14
1917 #define DBG_ENQUEUE 0x15
1918 #define DBG_DEQUEUE 0x16
1919
1920 #define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1921 #define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1922 #define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1923 #define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1924 #define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1925 #define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1926 #define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1927
1928
1929 #define EVPROCDEQUE(p, evq) do { \
1930 proc_lock(p); \
1931 if (evq->ee_flags & EV_QUEUED) { \
1932 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \
1933 evq->ee_flags &= ~EV_QUEUED; \
1934 } \
1935 proc_unlock(p); \
1936 } while (0);
1937
1938
1939 /*
1940 * called upon socket close. deque and free all events for
1941 * the socket... socket must be locked by caller.
1942 */
1943 void
1944 evsofree(struct socket *sp)
1945 {
1946 struct eventqelt *evq, *next;
1947 proc_t p;
1948
1949 if (sp == NULL)
1950 return;
1951
1952 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1953 next = evq->ee_slist.tqe_next;
1954 p = evq->ee_proc;
1955
1956 if (evq->ee_flags & EV_QUEUED) {
1957 EVPROCDEQUE(p, evq);
1958 }
1959 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1960 FREE(evq, M_TEMP);
1961 }
1962 }
1963
1964
1965 /*
1966 * called upon pipe close. deque and free all events for
1967 * the pipe... pipe must be locked by caller
1968 */
1969 void
1970 evpipefree(struct pipe *cpipe)
1971 {
1972 struct eventqelt *evq, *next;
1973 proc_t p;
1974
1975 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1976 next = evq->ee_slist.tqe_next;
1977 p = evq->ee_proc;
1978
1979 EVPROCDEQUE(p, evq);
1980
1981 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1982 FREE(evq, M_TEMP);
1983 }
1984 }
1985
1986
1987 /*
1988 * enqueue this event if it's not already queued. wakeup
1989 * the proc if we do queue this event to it...
1990 * entered with proc lock held... we drop it before
1991 * doing the wakeup and return in that state
1992 */
1993 static void
1994 evprocenque(struct eventqelt *evq)
1995 {
1996 proc_t p;
1997
1998 assert(evq);
1999 p = evq->ee_proc;
2000
2001 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
2002
2003 proc_lock(p);
2004
2005 if (evq->ee_flags & EV_QUEUED) {
2006 proc_unlock(p);
2007
2008 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
2009 return;
2010 }
2011 evq->ee_flags |= EV_QUEUED;
2012
2013 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
2014
2015 proc_unlock(p);
2016
2017 wakeup(&p->p_evlist);
2018
2019 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
2020 }
2021
2022
2023 /*
2024 * pipe lock must be taken by the caller
2025 */
2026 void
2027 postpipeevent(struct pipe *pipep, int event)
2028 {
2029 int mask;
2030 struct eventqelt *evq;
2031
2032 if (pipep == NULL)
2033 return;
2034 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
2035
2036 for (evq = pipep->pipe_evlist.tqh_first;
2037 evq != NULL; evq = evq->ee_slist.tqe_next) {
2038
2039 if (evq->ee_eventmask == 0)
2040 continue;
2041 mask = 0;
2042
2043 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
2044
2045 case EV_RWBYTES:
2046 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
2047 mask |= EV_RE;
2048 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
2049 }
2050 if ((evq->ee_eventmask & EV_WR) &&
2051 (MAX(pipep->pipe_buffer.size,PIPE_SIZE) - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
2052
2053 if (pipep->pipe_state & PIPE_EOF) {
2054 mask |= EV_WR|EV_RESET;
2055 break;
2056 }
2057 mask |= EV_WR;
2058 evq->ee_req.er_wcnt = MAX(pipep->pipe_buffer.size, PIPE_SIZE) - pipep->pipe_buffer.cnt;
2059 }
2060 break;
2061
2062 case EV_WCLOSED:
2063 case EV_RCLOSED:
2064 if ((evq->ee_eventmask & EV_RE)) {
2065 mask |= EV_RE|EV_RCLOSED;
2066 }
2067 if ((evq->ee_eventmask & EV_WR)) {
2068 mask |= EV_WR|EV_WCLOSED;
2069 }
2070 break;
2071
2072 default:
2073 return;
2074 }
2075 if (mask) {
2076 /*
2077 * disarm... postevents are nops until this event is 'read' via
2078 * waitevent and then re-armed via modwatch
2079 */
2080 evq->ee_eventmask = 0;
2081
2082 /*
2083 * since events are disarmed until after the waitevent
2084 * the ee_req.er_xxxx fields can't change once we've
2085 * inserted this event into the proc queue...
2086 * therefore, the waitevent will see a 'consistent'
2087 * snapshot of the event, even though it won't hold
2088 * the pipe lock, and we're updating the event outside
2089 * of the proc lock, which it will hold
2090 */
2091 evq->ee_req.er_eventbits |= mask;
2092
2093 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
2094
2095 evprocenque(evq);
2096 }
2097 }
2098 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
2099 }
2100
2101 #if SOCKETS
2102 /*
2103 * given either a sockbuf or a socket run down the
2104 * event list and queue ready events found...
2105 * the socket must be locked by the caller
2106 */
2107 void
2108 postevent(struct socket *sp, struct sockbuf *sb, int event)
2109 {
2110 int mask;
2111 struct eventqelt *evq;
2112 struct tcpcb *tp;
2113
2114 if (sb)
2115 sp = sb->sb_so;
2116 if (sp == NULL)
2117 return;
2118
2119 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2120
2121 for (evq = sp->so_evlist.tqh_first;
2122 evq != NULL; evq = evq->ee_slist.tqe_next) {
2123
2124 if (evq->ee_eventmask == 0)
2125 continue;
2126 mask = 0;
2127
2128 /* ready for reading:
2129 - byte cnt >= receive low water mark
2130 - read-half of conn closed
2131 - conn pending for listening sock
2132 - socket error pending
2133
2134 ready for writing
2135 - byte cnt avail >= send low water mark
2136 - write half of conn closed
2137 - socket error pending
2138 - non-blocking conn completed successfully
2139
2140 exception pending
2141 - out of band data
2142 - sock at out of band mark
2143 */
2144
2145 switch (event & EV_DMASK) {
2146
2147 case EV_OOB:
2148 if ((evq->ee_eventmask & EV_EX)) {
2149 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2150 mask |= EV_EX|EV_OOB;
2151 }
2152 break;
2153
2154 case EV_RWBYTES|EV_OOB:
2155 if ((evq->ee_eventmask & EV_EX)) {
2156 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2157 mask |= EV_EX|EV_OOB;
2158 }
2159 /*
2160 * fall into the next case
2161 */
2162 case EV_RWBYTES:
2163 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
2164 if (sp->so_error) {
2165 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2166 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2167 (tp->t_state == TCPS_CLOSED)) {
2168 mask |= EV_RE|EV_RESET;
2169 break;
2170 }
2171 }
2172 }
2173 mask |= EV_RE;
2174 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2175
2176 if (sp->so_state & SS_CANTRCVMORE) {
2177 mask |= EV_FIN;
2178 break;
2179 }
2180 }
2181 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
2182 if (sp->so_error) {
2183 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2184 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2185 (tp->t_state == TCPS_CLOSED)) {
2186 mask |= EV_WR|EV_RESET;
2187 break;
2188 }
2189 }
2190 }
2191 mask |= EV_WR;
2192 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2193 }
2194 break;
2195
2196 case EV_RCONN:
2197 if ((evq->ee_eventmask & EV_RE)) {
2198 mask |= EV_RE|EV_RCONN;
2199 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
2200 }
2201 break;
2202
2203 case EV_WCONN:
2204 if ((evq->ee_eventmask & EV_WR)) {
2205 mask |= EV_WR|EV_WCONN;
2206 }
2207 break;
2208
2209 case EV_RCLOSED:
2210 if ((evq->ee_eventmask & EV_RE)) {
2211 mask |= EV_RE|EV_RCLOSED;
2212 }
2213 break;
2214
2215 case EV_WCLOSED:
2216 if ((evq->ee_eventmask & EV_WR)) {
2217 mask |= EV_WR|EV_WCLOSED;
2218 }
2219 break;
2220
2221 case EV_FIN:
2222 if (evq->ee_eventmask & EV_RE) {
2223 mask |= EV_RE|EV_FIN;
2224 }
2225 break;
2226
2227 case EV_RESET:
2228 case EV_TIMEOUT:
2229 if (evq->ee_eventmask & EV_RE) {
2230 mask |= EV_RE | event;
2231 }
2232 if (evq->ee_eventmask & EV_WR) {
2233 mask |= EV_WR | event;
2234 }
2235 break;
2236
2237 default:
2238 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2239 return;
2240 } /* switch */
2241
2242 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2243
2244 if (mask) {
2245 /*
2246 * disarm... postevents are nops until this event is 'read' via
2247 * waitevent and then re-armed via modwatch
2248 */
2249 evq->ee_eventmask = 0;
2250
2251 /*
2252 * since events are disarmed until after the waitevent
2253 * the ee_req.er_xxxx fields can't change once we've
2254 * inserted this event into the proc queue...
2255 * since waitevent can't see this event until we
2256 * enqueue it, waitevent will see a 'consistent'
2257 * snapshot of the event, even though it won't hold
2258 * the socket lock, and we're updating the event outside
2259 * of the proc lock, which it will hold
2260 */
2261 evq->ee_req.er_eventbits |= mask;
2262
2263 evprocenque(evq);
2264 }
2265 }
2266 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
2267 }
2268 #endif /* SOCKETS */
2269
2270
2271 /*
2272 * watchevent system call. user passes us an event to watch
2273 * for. we malloc an event object, initialize it, and queue
2274 * it to the open socket. when the event occurs, postevent()
2275 * will enque it back to our proc where we can retrieve it
2276 * via waitevent().
2277 *
2278 * should this prevent duplicate events on same socket?
2279 *
2280 * Returns:
2281 * ENOMEM No memory for operation
2282 * copyin:EFAULT
2283 */
2284 int
2285 watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
2286 {
2287 struct eventqelt *evq = (struct eventqelt *)0;
2288 struct eventqelt *np = NULL;
2289 struct eventreq64 *erp;
2290 struct fileproc *fp = NULL;
2291 int error;
2292
2293 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2294
2295 // get a qelt and fill with users req
2296 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2297
2298 if (evq == NULL)
2299 return (ENOMEM);
2300 erp = &evq->ee_req;
2301
2302 // get users request pkt
2303
2304 if (IS_64BIT_PROCESS(p)) {
2305 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2306 } else {
2307 struct eventreq32 er32;
2308
2309 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2310 if (error == 0) {
2311 /*
2312 * the user only passes in the
2313 * er_type, er_handle and er_data...
2314 * the other fields are initialized
2315 * below, so don't bother to copy
2316 */
2317 erp->er_type = er32.er_type;
2318 erp->er_handle = er32.er_handle;
2319 erp->er_data = (user_addr_t)er32.er_data;
2320 }
2321 }
2322 if (error) {
2323 FREE(evq, M_TEMP);
2324 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2325
2326 return(error);
2327 }
2328 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2329
2330 // validate, freeing qelt if errors
2331 error = 0;
2332 proc_fdlock(p);
2333
2334 if (erp->er_type != EV_FD) {
2335 error = EINVAL;
2336 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2337 error = EBADF;
2338 #if SOCKETS
2339 } else if (fp->f_type == DTYPE_SOCKET) {
2340 socket_lock((struct socket *)fp->f_data, 1);
2341 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2342 #endif /* SOCKETS */
2343 } else if (fp->f_type == DTYPE_PIPE) {
2344 PIPE_LOCK((struct pipe *)fp->f_data);
2345 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2346 } else {
2347 fp_drop(p, erp->er_handle, fp, 1);
2348 error = EINVAL;
2349 }
2350 proc_fdunlock(p);
2351
2352 if (error) {
2353 FREE(evq, M_TEMP);
2354
2355 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2356 return(error);
2357 }
2358
2359 /*
2360 * only allow one watch per file per proc
2361 */
2362 for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2363 if (np->ee_proc == p) {
2364 #if SOCKETS
2365 if (fp->f_type == DTYPE_SOCKET)
2366 socket_unlock((struct socket *)fp->f_data, 1);
2367 else
2368 #endif /* SOCKETS */
2369 PIPE_UNLOCK((struct pipe *)fp->f_data);
2370 fp_drop(p, erp->er_handle, fp, 0);
2371 FREE(evq, M_TEMP);
2372
2373 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2374 return(EINVAL);
2375 }
2376 }
2377 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2378 evq->ee_proc = p;
2379 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2380 evq->ee_flags = 0;
2381
2382 #if SOCKETS
2383 if (fp->f_type == DTYPE_SOCKET) {
2384 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2385 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2386
2387 socket_unlock((struct socket *)fp->f_data, 1);
2388 } else
2389 #endif /* SOCKETS */
2390 {
2391 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2392 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2393
2394 PIPE_UNLOCK((struct pipe *)fp->f_data);
2395 }
2396 fp_drop_event(p, erp->er_handle, fp);
2397
2398 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2399 return(0);
2400 }
2401
2402
2403
2404 /*
2405 * waitevent system call.
2406 * grabs the next waiting event for this proc and returns
2407 * it. if no events, user can request to sleep with timeout
2408 * or without or poll mode
2409 * ((tv != NULL && interval == 0) || tv == -1)
2410 */
2411 int
2412 waitevent(proc_t p, struct waitevent_args *uap, int *retval)
2413 {
2414 int error = 0;
2415 struct eventqelt *evq;
2416 struct eventreq64 *erp;
2417 uint64_t abstime, interval;
2418 boolean_t fast_poll = FALSE;
2419 union {
2420 struct eventreq64 er64;
2421 struct eventreq32 er32;
2422 } uer;
2423
2424 interval = 0;
2425
2426 if (uap->tv) {
2427 struct timeval atv;
2428 /*
2429 * check for fast poll method
2430 */
2431 if (IS_64BIT_PROCESS(p)) {
2432 if (uap->tv == (user_addr_t)-1)
2433 fast_poll = TRUE;
2434 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2435 fast_poll = TRUE;
2436
2437 if (fast_poll == TRUE) {
2438 if (p->p_evlist.tqh_first == NULL) {
2439 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2440 /*
2441 * poll failed
2442 */
2443 *retval = 1;
2444 return (0);
2445 }
2446 proc_lock(p);
2447 goto retry;
2448 }
2449 if (IS_64BIT_PROCESS(p)) {
2450 struct user64_timeval atv64;
2451 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
2452 /* Loses resolution - assume timeout < 68 years */
2453 atv.tv_sec = atv64.tv_sec;
2454 atv.tv_usec = atv64.tv_usec;
2455 } else {
2456 struct user32_timeval atv32;
2457 error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
2458 atv.tv_sec = atv32.tv_sec;
2459 atv.tv_usec = atv32.tv_usec;
2460 }
2461
2462 if (error)
2463 return(error);
2464 if (itimerfix(&atv)) {
2465 error = EINVAL;
2466 return(error);
2467 }
2468 interval = tvtoabstime(&atv);
2469 }
2470 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
2471
2472 proc_lock(p);
2473 retry:
2474 if ((evq = p->p_evlist.tqh_first) != NULL) {
2475 /*
2476 * found one... make a local copy while it's still on the queue
2477 * to prevent it from changing while in the midst of copying
2478 * don't want to hold the proc lock across a copyout because
2479 * it might block on a page fault at the target in user space
2480 */
2481 erp = &evq->ee_req;
2482
2483 if (IS_64BIT_PROCESS(p))
2484 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2485 else {
2486 uer.er32.er_type = erp->er_type;
2487 uer.er32.er_handle = erp->er_handle;
2488 uer.er32.er_data = (uint32_t)erp->er_data;
2489 uer.er32.er_ecnt = erp->er_ecnt;
2490 uer.er32.er_rcnt = erp->er_rcnt;
2491 uer.er32.er_wcnt = erp->er_wcnt;
2492 uer.er32.er_eventbits = erp->er_eventbits;
2493 }
2494 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2495
2496 evq->ee_flags &= ~EV_QUEUED;
2497
2498 proc_unlock(p);
2499
2500 if (IS_64BIT_PROCESS(p))
2501 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2502 else
2503 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
2504
2505 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2506 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
2507 return (error);
2508 }
2509 else {
2510 if (uap->tv && interval == 0) {
2511 proc_unlock(p);
2512 *retval = 1; // poll failed
2513
2514 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
2515 return (error);
2516 }
2517 if (interval != 0)
2518 clock_absolutetime_interval_to_deadline(interval, &abstime);
2519 else
2520 abstime = 0;
2521
2522 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
2523
2524 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2525
2526 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
2527
2528 if (error == 0)
2529 goto retry;
2530 if (error == ERESTART)
2531 error = EINTR;
2532 if (error == EWOULDBLOCK) {
2533 *retval = 1;
2534 error = 0;
2535 }
2536 }
2537 proc_unlock(p);
2538
2539 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
2540 return (error);
2541 }
2542
2543
2544 /*
2545 * modwatch system call. user passes in event to modify.
2546 * if we find it we reset the event bits and que/deque event
2547 * it needed.
2548 */
2549 int
2550 modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
2551 {
2552 struct eventreq64 er;
2553 struct eventreq64 *erp = &er;
2554 struct eventqelt *evq = NULL; /* protected by error return */
2555 int error;
2556 struct fileproc *fp;
2557 int flag;
2558
2559 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2560
2561 /*
2562 * get user's request pkt
2563 * just need the er_type and er_handle which sit above the
2564 * problematic er_data (32/64 issue)... so only copy in
2565 * those 2 fields
2566 */
2567 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2568 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2569 return(error);
2570 }
2571 proc_fdlock(p);
2572
2573 if (erp->er_type != EV_FD) {
2574 error = EINVAL;
2575 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2576 error = EBADF;
2577 #if SOCKETS
2578 } else if (fp->f_type == DTYPE_SOCKET) {
2579 socket_lock((struct socket *)fp->f_data, 1);
2580 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2581 #endif /* SOCKETS */
2582 } else if (fp->f_type == DTYPE_PIPE) {
2583 PIPE_LOCK((struct pipe *)fp->f_data);
2584 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2585 } else {
2586 fp_drop(p, erp->er_handle, fp, 1);
2587 error = EINVAL;
2588 }
2589
2590 if (error) {
2591 proc_fdunlock(p);
2592 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2593 return(error);
2594 }
2595
2596 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2597 fp->f_flags &= ~FP_WAITEVENT;
2598 }
2599 proc_fdunlock(p);
2600
2601 // locate event if possible
2602 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2603 if (evq->ee_proc == p)
2604 break;
2605 }
2606 if (evq == NULL) {
2607 #if SOCKETS
2608 if (fp->f_type == DTYPE_SOCKET)
2609 socket_unlock((struct socket *)fp->f_data, 1);
2610 else
2611 #endif /* SOCKETS */
2612 PIPE_UNLOCK((struct pipe *)fp->f_data);
2613 fp_drop(p, erp->er_handle, fp, 0);
2614 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2615 return(EINVAL);
2616 }
2617 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2618
2619 if (uap->u_eventmask == EV_RM) {
2620 EVPROCDEQUE(p, evq);
2621
2622 #if SOCKETS
2623 if (fp->f_type == DTYPE_SOCKET) {
2624 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2625 socket_unlock((struct socket *)fp->f_data, 1);
2626 } else
2627 #endif /* SOCKETS */
2628 {
2629 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2630 PIPE_UNLOCK((struct pipe *)fp->f_data);
2631 }
2632 fp_drop(p, erp->er_handle, fp, 0);
2633 FREE(evq, M_TEMP);
2634 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2635 return(0);
2636 }
2637 switch (uap->u_eventmask & EV_MASK) {
2638
2639 case 0:
2640 flag = 0;
2641 break;
2642
2643 case EV_RE:
2644 case EV_WR:
2645 case EV_RE|EV_WR:
2646 flag = EV_RWBYTES;
2647 break;
2648
2649 case EV_EX:
2650 flag = EV_OOB;
2651 break;
2652
2653 case EV_EX|EV_RE:
2654 case EV_EX|EV_WR:
2655 case EV_EX|EV_RE|EV_WR:
2656 flag = EV_OOB|EV_RWBYTES;
2657 break;
2658
2659 default:
2660 #if SOCKETS
2661 if (fp->f_type == DTYPE_SOCKET)
2662 socket_unlock((struct socket *)fp->f_data, 1);
2663 else
2664 #endif /* SOCKETS */
2665 PIPE_UNLOCK((struct pipe *)fp->f_data);
2666 fp_drop(p, erp->er_handle, fp, 0);
2667 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2668 return(EINVAL);
2669 }
2670 /*
2671 * since we're holding the socket/pipe lock, the event
2672 * cannot go from the unqueued state to the queued state
2673 * however, it can go from the queued state to the unqueued state
2674 * since that direction is protected by the proc_lock...
2675 * so do a quick check for EV_QUEUED w/o holding the proc lock
2676 * since by far the common case will be NOT EV_QUEUED, this saves
2677 * us taking the proc_lock the majority of the time
2678 */
2679 if (evq->ee_flags & EV_QUEUED) {
2680 /*
2681 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2682 */
2683 EVPROCDEQUE(p, evq);
2684 }
2685 /*
2686 * while the event is off the proc queue and
2687 * we're holding the socket/pipe lock
2688 * it's safe to update these fields...
2689 */
2690 evq->ee_req.er_eventbits = 0;
2691 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2692
2693 #if SOCKETS
2694 if (fp->f_type == DTYPE_SOCKET) {
2695 postevent((struct socket *)fp->f_data, 0, flag);
2696 socket_unlock((struct socket *)fp->f_data, 1);
2697 } else
2698 #endif /* SOCKETS */
2699 {
2700 postpipeevent((struct pipe *)fp->f_data, flag);
2701 PIPE_UNLOCK((struct pipe *)fp->f_data);
2702 }
2703 fp_drop(p, erp->er_handle, fp, 0);
2704 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
2705 return(0);
2706 }
2707
2708 /* this routine is called from the close of fd with proc_fdlock held */
2709 int
2710 waitevent_close(struct proc *p, struct fileproc *fp)
2711 {
2712 struct eventqelt *evq;
2713
2714
2715 fp->f_flags &= ~FP_WAITEVENT;
2716
2717 #if SOCKETS
2718 if (fp->f_type == DTYPE_SOCKET) {
2719 socket_lock((struct socket *)fp->f_data, 1);
2720 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2721 } else
2722 #endif /* SOCKETS */
2723 if (fp->f_type == DTYPE_PIPE) {
2724 PIPE_LOCK((struct pipe *)fp->f_data);
2725 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2726 }
2727 else {
2728 return(EINVAL);
2729 }
2730 proc_fdunlock(p);
2731
2732
2733 // locate event if possible
2734 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2735 if (evq->ee_proc == p)
2736 break;
2737 }
2738 if (evq == NULL) {
2739 #if SOCKETS
2740 if (fp->f_type == DTYPE_SOCKET)
2741 socket_unlock((struct socket *)fp->f_data, 1);
2742 else
2743 #endif /* SOCKETS */
2744 PIPE_UNLOCK((struct pipe *)fp->f_data);
2745
2746 proc_fdlock(p);
2747
2748 return(EINVAL);
2749 }
2750 EVPROCDEQUE(p, evq);
2751
2752 #if SOCKETS
2753 if (fp->f_type == DTYPE_SOCKET) {
2754 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2755 socket_unlock((struct socket *)fp->f_data, 1);
2756 } else
2757 #endif /* SOCKETS */
2758 {
2759 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2760 PIPE_UNLOCK((struct pipe *)fp->f_data);
2761 }
2762 FREE(evq, M_TEMP);
2763
2764 proc_fdlock(p);
2765
2766 return(0);
2767 }
2768
2769
2770 /*
2771 * gethostuuid
2772 *
2773 * Description: Get the host UUID from IOKit and return it to user space.
2774 *
2775 * Parameters: uuid_buf Pointer to buffer to receive UUID
2776 * timeout Timespec for timout
2777 *
2778 * Returns: 0 Success
2779 * EWOULDBLOCK Timeout is too short
2780 * copyout:EFAULT Bad user buffer
2781 *
2782 * Notes: A timeout seems redundant, since if it's tolerable to not
2783 * have a system UUID in hand, then why ask for one?
2784 */
2785 int
2786 gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2787 {
2788 kern_return_t kret;
2789 int error;
2790 mach_timespec_t mach_ts; /* for IOKit call */
2791 __darwin_uuid_t uuid_kern; /* for IOKit call */
2792
2793 /* Convert the 32/64 bit timespec into a mach_timespec_t */
2794 if ( proc_is64bit(p) ) {
2795 struct user64_timespec ts;
2796 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2797 if (error)
2798 return (error);
2799 mach_ts.tv_sec = ts.tv_sec;
2800 mach_ts.tv_nsec = ts.tv_nsec;
2801 } else {
2802 struct user32_timespec ts;
2803 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
2804 if (error)
2805 return (error);
2806 mach_ts.tv_sec = ts.tv_sec;
2807 mach_ts.tv_nsec = ts.tv_nsec;
2808 }
2809
2810 /* Call IOKit with the stack buffer to get the UUID */
2811 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2812
2813 /*
2814 * If we get it, copy out the data to the user buffer; note that a
2815 * uuid_t is an array of characters, so this is size invariant for
2816 * 32 vs. 64 bit.
2817 */
2818 if (kret == KERN_SUCCESS) {
2819 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2820 } else {
2821 error = EWOULDBLOCK;
2822 }
2823
2824 return (error);
2825 }
2826
2827 /*
2828 * ledger
2829 *
2830 * Description: Omnibus system call for ledger operations
2831 */
2832 int
2833 ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
2834 {
2835 int rval, pid, len, error;
2836 #ifdef LEDGER_DEBUG
2837 struct ledger_limit_args lla;
2838 #endif
2839 task_t task;
2840 proc_t proc;
2841
2842 /* Finish copying in the necessary args before taking the proc lock */
2843 error = 0;
2844 len = 0;
2845 if (args->cmd == LEDGER_ENTRY_INFO)
2846 error = copyin(args->arg3, (char *)&len, sizeof (len));
2847 else if (args->cmd == LEDGER_TEMPLATE_INFO)
2848 error = copyin(args->arg2, (char *)&len, sizeof (len));
2849 #ifdef LEDGER_DEBUG
2850 else if (args->cmd == LEDGER_LIMIT)
2851 error = copyin(args->arg2, (char *)&lla, sizeof (lla));
2852 #endif
2853 if (error)
2854 return (error);
2855 if (len < 0)
2856 return (EINVAL);
2857
2858 rval = 0;
2859 if (args->cmd != LEDGER_TEMPLATE_INFO) {
2860 pid = args->arg1;
2861 proc = proc_find(pid);
2862 if (proc == NULL)
2863 return (ESRCH);
2864
2865 #if CONFIG_MACF
2866 error = mac_proc_check_ledger(p, proc, args->cmd);
2867 if (error) {
2868 proc_rele(proc);
2869 return (error);
2870 }
2871 #endif
2872
2873 task = proc->task;
2874 }
2875
2876 switch (args->cmd) {
2877 #ifdef LEDGER_DEBUG
2878 case LEDGER_LIMIT: {
2879 if (!is_suser())
2880 rval = EPERM;
2881 rval = ledger_limit(task, &lla);
2882 proc_rele(proc);
2883 break;
2884 }
2885 #endif
2886 case LEDGER_INFO: {
2887 struct ledger_info info;
2888
2889 rval = ledger_info(task, &info);
2890 proc_rele(proc);
2891 if (rval == 0)
2892 rval = copyout(&info, args->arg2,
2893 sizeof (info));
2894 break;
2895 }
2896
2897 case LEDGER_ENTRY_INFO: {
2898 void *buf;
2899 int sz;
2900
2901 rval = ledger_entry_info(task, &buf, &len);
2902 proc_rele(proc);
2903 if ((rval == 0) && (len > 0)) {
2904 sz = len * sizeof (struct ledger_entry_info);
2905 rval = copyout(buf, args->arg2, sz);
2906 kfree(buf, sz);
2907 }
2908 if (rval == 0)
2909 rval = copyout(&len, args->arg3, sizeof (len));
2910 break;
2911 }
2912
2913 case LEDGER_TEMPLATE_INFO: {
2914 void *buf;
2915 int sz;
2916
2917 rval = ledger_template_info(&buf, &len);
2918 if ((rval == 0) && (len > 0)) {
2919 sz = len * sizeof (struct ledger_template_info);
2920 rval = copyout(buf, args->arg1, sz);
2921 kfree(buf, sz);
2922 }
2923 if (rval == 0)
2924 rval = copyout(&len, args->arg2, sizeof (len));
2925 break;
2926 }
2927
2928 default:
2929 rval = EINVAL;
2930 }
2931
2932 return (rval);
2933 }