]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_generic.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/ioctl.h>
79 #include <sys/file_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/socketvar.h>
82 #include <sys/uio_internal.h>
83 #include <sys/kernel.h>
84 #include <sys/stat.h>
85 #include <sys/malloc.h>
86 #include <sys/sysproto.h>
87
88 #include <sys/mount_internal.h>
89 #include <sys/protosw.h>
90 #include <sys/ev.h>
91 #include <sys/user.h>
92 #include <sys/kdebug.h>
93 #include <sys/poll.h>
94 #include <sys/event.h>
95 #include <sys/eventvar.h>
96
97 #include <mach/mach_types.h>
98 #include <kern/kern_types.h>
99 #include <kern/assert.h>
100 #include <kern/kalloc.h>
101 #include <kern/thread.h>
102 #include <kern/clock.h>
103
104 #include <sys/mbuf.h>
105 #include <sys/socket.h>
106 #include <sys/socketvar.h>
107 #include <sys/errno.h>
108 #include <sys/syscall.h>
109 #include <sys/pipe.h>
110
111 #include <bsm/audit_kernel.h>
112
113 #include <net/if.h>
114 #include <net/route.h>
115
116 #include <netinet/in.h>
117 #include <netinet/in_systm.h>
118 #include <netinet/ip.h>
119 #include <netinet/in_pcb.h>
120 #include <netinet/ip_var.h>
121 #include <netinet/ip6.h>
122 #include <netinet/tcp.h>
123 #include <netinet/tcp_fsm.h>
124 #include <netinet/tcp_seq.h>
125 #include <netinet/tcp_timer.h>
126 #include <netinet/tcp_var.h>
127 #include <netinet/tcpip.h>
128 #include <netinet/tcp_debug.h>
129 /* for wait queue based select */
130 #include <kern/wait_queue.h>
131 #include <kern/kalloc.h>
132 #include <sys/vnode_internal.h>
133
134 /* XXX should be in a header file somewhere */
135 void evsofree(struct socket *);
136 void evpipefree(struct pipe *);
137 void postpipeevent(struct pipe *, int);
138 void postevent(struct socket *, struct sockbuf *, int);
139 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
140
141 int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
142 int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval);
143 extern void *get_bsduthreadarg(thread_t);
144 extern int *get_bsduthreadrval(thread_t);
145
146 __private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
147 user_addr_t bufp, user_size_t nbyte,
148 off_t offset, int flags, user_ssize_t *retval);
149 __private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
150 user_addr_t bufp, user_size_t nbyte,
151 off_t offset, int flags, user_ssize_t *retval);
152 __private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
153 __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd);
154
155 #if NETAT
156 extern int appletalk_inited;
157 #endif /* NETAT */
158
159 #define f_flag f_fglob->fg_flag
160 #define f_type f_fglob->fg_type
161 #define f_msgcount f_fglob->fg_msgcount
162 #define f_cred f_fglob->fg_cred
163 #define f_ops f_fglob->fg_ops
164 #define f_offset f_fglob->fg_offset
165 #define f_data f_fglob->fg_data
166
167 /*
168 * Read system call.
169 *
170 * Returns: 0 Success
171 * preparefileread:EBADF
172 * preparefileread:ESPIPE
173 * preparefileread:ENXIO
174 * preparefileread:EBADF
175 * dofileread:???
176 */
177 int
178 read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
179 {
180 __pthread_testcancel(1);
181 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval));
182 }
183
184 int
185 read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
186 {
187 struct fileproc *fp;
188 int error;
189 int fd = uap->fd;
190
191 if ( (error = preparefileread(p, &fp, fd, 0)) )
192 return (error);
193
194 error = dofileread(vfs_context_current(), fp, uap->cbuf, uap->nbyte,
195 (off_t)-1, 0, retval);
196
197 donefileread(p, fp, fd);
198
199 return (error);
200 }
201
202 /*
203 * Pread system call
204 *
205 * Returns: 0 Success
206 * preparefileread:EBADF
207 * preparefileread:ESPIPE
208 * preparefileread:ENXIO
209 * preparefileread:EBADF
210 * dofileread:???
211 */
212 int
213 pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
214 {
215 __pthread_testcancel(1);
216 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval));
217 }
218
219 int
220 pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
221 {
222 struct fileproc *fp = NULL; /* fp set by preparefileread() */
223 int fd = uap->fd;
224 int error;
225
226 if ( (error = preparefileread(p, &fp, fd, 1)) )
227 return (error);
228
229 error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte,
230 uap->offset, FOF_OFFSET, retval);
231
232 donefileread(p, fp, fd);
233
234 if (!error)
235 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
236 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
237
238 return (error);
239 }
240
241 /*
242 * Code common for read and pread
243 */
244
245 void
246 donefileread(struct proc *p, struct fileproc *fp, int fd)
247 {
248 proc_fdlock_spin(p);
249
250 fp->f_flags &= ~FP_INCHRREAD;
251
252 fp_drop(p, fd, fp, 1);
253 proc_fdunlock(p);
254 }
255
256 /*
257 * Returns: 0 Success
258 * EBADF
259 * ESPIPE
260 * ENXIO
261 * fp_lookup:EBADF
262 * fo_read:???
263 */
264 int
265 preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
266 {
267 vnode_t vp;
268 int error;
269 struct fileproc *fp;
270
271 proc_fdlock_spin(p);
272
273 error = fp_lookup(p, fd, &fp, 1);
274
275 if (error) {
276 proc_fdunlock(p);
277 return (error);
278 }
279 if ((fp->f_flag & FREAD) == 0) {
280 error = EBADF;
281 goto out;
282 }
283 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
284 error = ESPIPE;
285 goto out;
286 }
287 if (fp->f_type == DTYPE_VNODE) {
288 vp = (struct vnode *)fp->f_fglob->fg_data;
289
290 if (check_for_pread && (vnode_isfifo(vp))) {
291 error = ESPIPE;
292 goto out;
293 }
294 if (check_for_pread && (vp->v_flag & VISTTY)) {
295 error = ENXIO;
296 goto out;
297 }
298 if (vp->v_type == VCHR)
299 fp->f_flags |= FP_INCHRREAD;
300 }
301
302 *fp_ret = fp;
303
304 proc_fdunlock(p);
305 return (0);
306
307 out:
308 fp_drop(p, fd, fp, 1);
309 proc_fdunlock(p);
310 return (error);
311 }
312
313
314 /*
315 * Returns: 0 Success
316 * EINVAL
317 * fo_read:???
318 */
319 __private_extern__ int
320 dofileread(vfs_context_t ctx, struct fileproc *fp,
321 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
322 user_ssize_t *retval)
323 {
324 uio_t auio;
325 user_ssize_t bytecnt;
326 long error = 0;
327 char uio_buf[ UIO_SIZEOF(1) ];
328
329 // LP64todo - do we want to raise this?
330 if (nbyte > INT_MAX)
331 return (EINVAL);
332
333 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
334 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
335 &uio_buf[0], sizeof(uio_buf));
336 } else {
337 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
338 &uio_buf[0], sizeof(uio_buf));
339 }
340 uio_addiov(auio, bufp, nbyte);
341
342 bytecnt = nbyte;
343
344 if ((error = fo_read(fp, auio, flags, ctx))) {
345 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
346 error == EINTR || error == EWOULDBLOCK))
347 error = 0;
348 }
349 bytecnt -= uio_resid(auio);
350
351 *retval = bytecnt;
352
353 return (error);
354 }
355
356 /*
357 * Scatter read system call.
358 *
359 * Returns: 0 Success
360 * EINVAL
361 * ENOMEM
362 * copyin:EFAULT
363 * rd_uio:???
364 */
365 int
366 readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
367 {
368 __pthread_testcancel(1);
369 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval));
370 }
371
372 int
373 readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
374 {
375 uio_t auio = NULL;
376 int error;
377 int size_of_iovec;
378 struct user_iovec *iovp;
379
380 /* Verify range bedfore calling uio_create() */
381 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
382 return (EINVAL);
383
384 /* allocate a uio large enough to hold the number of iovecs passed */
385 auio = uio_create(uap->iovcnt, 0,
386 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
387 UIO_READ);
388
389 /* get location of iovecs within the uio. then copyin the iovecs from
390 * user space.
391 */
392 iovp = uio_iovsaddr(auio);
393 if (iovp == NULL) {
394 error = ENOMEM;
395 goto ExitThisRoutine;
396 }
397 size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
398 error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
399 if (error) {
400 goto ExitThisRoutine;
401 }
402
403 /* finalize uio_t for use and do the IO
404 */
405 uio_calculateresid(auio);
406 error = rd_uio(p, uap->fd, auio, retval);
407
408 ExitThisRoutine:
409 if (auio != NULL) {
410 uio_free(auio);
411 }
412 return (error);
413 }
414
415 /*
416 * Write system call
417 *
418 * Returns: 0 Success
419 * EBADF
420 * fp_lookup:EBADF
421 * dofilewrite:???
422 */
423 int
424 write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
425 {
426 __pthread_testcancel(1);
427 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval));
428
429 }
430
431 int
432 write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
433 {
434 struct fileproc *fp;
435 int error;
436 int fd = uap->fd;
437
438 error = fp_lookup(p,fd,&fp,0);
439 if (error)
440 return(error);
441 if ((fp->f_flag & FWRITE) == 0) {
442 error = EBADF;
443 } else {
444 struct vfs_context context = *(vfs_context_current());
445 context.vc_ucred = fp->f_fglob->fg_cred;
446
447 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
448 (off_t)-1, 0, retval);
449 }
450 if (error == 0)
451 fp_drop_written(p, fd, fp);
452 else
453 fp_drop(p, fd, fp, 0);
454 return(error);
455 }
456
457 /*
458 * pwrite system call
459 *
460 * Returns: 0 Success
461 * EBADF
462 * ESPIPE
463 * ENXIO
464 * EINVAL
465 * fp_lookup:EBADF
466 * dofilewrite:???
467 */
468 int
469 pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
470 {
471 __pthread_testcancel(1);
472 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval));
473 }
474
475 int
476 pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
477 {
478 struct fileproc *fp;
479 int error;
480 int fd = uap->fd;
481 vnode_t vp = (vnode_t)0;
482
483 error = fp_lookup(p,fd,&fp,0);
484 if (error)
485 return(error);
486
487 if ((fp->f_flag & FWRITE) == 0) {
488 error = EBADF;
489 } else {
490 struct vfs_context context = *vfs_context_current();
491 context.vc_ucred = fp->f_fglob->fg_cred;
492
493 if (fp->f_type != DTYPE_VNODE) {
494 error = ESPIPE;
495 goto errout;
496 }
497 vp = (vnode_t)fp->f_fglob->fg_data;
498 if (vnode_isfifo(vp)) {
499 error = ESPIPE;
500 goto errout;
501 }
502 if ((vp->v_flag & VISTTY)) {
503 error = ENXIO;
504 goto errout;
505 }
506 if (uap->offset == (off_t)-1) {
507 error = EINVAL;
508 goto errout;
509 }
510
511 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
512 uap->offset, FOF_OFFSET, retval);
513 }
514 errout:
515 if (error == 0)
516 fp_drop_written(p, fd, fp);
517 else
518 fp_drop(p, fd, fp, 0);
519
520 if (!error)
521 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
522 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
523
524 return(error);
525 }
526
527 /*
528 * Returns: 0 Success
529 * EINVAL
530 * <fo_write>:EPIPE
531 * <fo_write>:??? [indirect through struct fileops]
532 */
533 __private_extern__ int
534 dofilewrite(vfs_context_t ctx, struct fileproc *fp,
535 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
536 user_ssize_t *retval)
537 {
538 uio_t auio;
539 long error = 0;
540 user_ssize_t bytecnt;
541 char uio_buf[ UIO_SIZEOF(1) ];
542
543 // LP64todo - do we want to raise this?
544 if (nbyte > INT_MAX)
545 return (EINVAL);
546
547 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
548 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
549 &uio_buf[0], sizeof(uio_buf));
550 } else {
551 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
552 &uio_buf[0], sizeof(uio_buf));
553 }
554 uio_addiov(auio, bufp, nbyte);
555
556 bytecnt = nbyte;
557 if ((error = fo_write(fp, auio, flags, ctx))) {
558 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
559 error == EINTR || error == EWOULDBLOCK))
560 error = 0;
561 /* The socket layer handles SIGPIPE */
562 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
563 /* XXX Raise the signal on the thread? */
564 psignal(vfs_context_proc(ctx), SIGPIPE);
565 }
566 }
567 bytecnt -= uio_resid(auio);
568 *retval = bytecnt;
569
570 return (error);
571 }
572
573 /*
574 * Gather write system call
575 */
576 int
577 writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
578 {
579 __pthread_testcancel(1);
580 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval));
581 }
582
583 int
584 writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
585 {
586 uio_t auio = NULL;
587 int error;
588 int size_of_iovec;
589 struct user_iovec *iovp;
590
591 /* Verify range bedfore calling uio_create() */
592 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV)
593 return (EINVAL);
594
595 /* allocate a uio large enough to hold the number of iovecs passed */
596 auio = uio_create(uap->iovcnt, 0,
597 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
598 UIO_WRITE);
599
600 /* get location of iovecs within the uio. then copyin the iovecs from
601 * user space.
602 */
603 iovp = uio_iovsaddr(auio);
604 if (iovp == NULL) {
605 error = ENOMEM;
606 goto ExitThisRoutine;
607 }
608 size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec));
609 error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec));
610 if (error) {
611 goto ExitThisRoutine;
612 }
613
614 /* finalize uio_t for use and do the IO
615 */
616 uio_calculateresid(auio);
617 error = wr_uio(p, uap->fd, auio, retval);
618
619 ExitThisRoutine:
620 if (auio != NULL) {
621 uio_free(auio);
622 }
623 return (error);
624 }
625
626
627 int
628 wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
629 {
630 struct fileproc *fp;
631 int error;
632 user_ssize_t count;
633 struct vfs_context context = *vfs_context_current();
634
635 error = fp_lookup(p,fdes,&fp,0);
636 if (error)
637 return(error);
638
639 if ((fp->f_flag & FWRITE) == 0) {
640 error = EBADF;
641 goto out;
642 }
643 count = uio_resid(uio);
644
645 context.vc_ucred = fp->f_cred;
646 error = fo_write(fp, uio, 0, &context);
647 if (error) {
648 if (uio_resid(uio) != count && (error == ERESTART ||
649 error == EINTR || error == EWOULDBLOCK))
650 error = 0;
651 /* The socket layer handles SIGPIPE */
652 if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
653 psignal(p, SIGPIPE);
654 }
655 *retval = count - uio_resid(uio);
656
657 out:
658 if ( (error == 0) )
659 fp_drop_written(p, fdes, fp);
660 else
661 fp_drop(p, fdes, fp, 0);
662 return(error);
663 }
664
665
666 int
667 rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval)
668 {
669 struct fileproc *fp;
670 int error;
671 user_ssize_t count;
672 struct vfs_context context = *vfs_context_current();
673
674 if ( (error = preparefileread(p, &fp, fdes, 0)) )
675 return (error);
676
677 count = uio_resid(uio);
678
679 context.vc_ucred = fp->f_cred;
680
681 error = fo_read(fp, uio, 0, &context);
682
683 if (error) {
684 if (uio_resid(uio) != count && (error == ERESTART ||
685 error == EINTR || error == EWOULDBLOCK))
686 error = 0;
687 }
688 *retval = count - uio_resid(uio);
689
690 donefileread(p, fp, fdes);
691
692 return (error);
693 }
694
695 /*
696 * Ioctl system call
697 *
698 * Returns: 0 Success
699 * EBADF
700 * ENOTTY
701 * ENOMEM
702 * ESRCH
703 * copyin:EFAULT
704 * copyoutEFAULT
705 * fp_lookup:EBADF Bad file descriptor
706 * fo_ioctl:???
707 */
708 int
709 ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval)
710 {
711 struct fileproc *fp;
712 u_long com;
713 int error = 0;
714 u_int size;
715 caddr_t datap, memp;
716 boolean_t is64bit;
717 int tmp;
718 #define STK_PARAMS 128
719 char stkbuf[STK_PARAMS];
720 int fd = uap->fd;
721 struct vfs_context context = *vfs_context_current();
722
723 AUDIT_ARG(fd, uap->fd);
724 AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */
725 AUDIT_ARG(addr, uap->data);
726
727 is64bit = proc_is64bit(p);
728
729 proc_fdlock(p);
730 error = fp_lookup(p,fd,&fp,1);
731 if (error) {
732 proc_fdunlock(p);
733 return(error);
734 }
735
736 AUDIT_ARG(file, p, fp);
737
738 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
739 error = EBADF;
740 goto out;
741 }
742
743 context.vc_ucred = fp->f_fglob->fg_cred;
744
745 #if CONFIG_MACF
746 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com);
747 if (error)
748 goto out;
749 #endif
750
751 #if NETAT
752 /*
753 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work
754 * while implementing an ATioctl system call
755 */
756 {
757 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) {
758 u_long fixed_command;
759
760 #ifdef APPLETALK_DEBUG
761 kprintf("ioctl: special AppleTalk \n");
762 #endif
763 datap = &stkbuf[0];
764 *(user_addr_t *)datap = uap->data;
765 fixed_command = _IOW(0, 0xff99, uap->data);
766 error = fo_ioctl(fp, fixed_command, datap, &context);
767 goto out;
768 }
769 }
770
771 #endif /* NETAT */
772
773
774 switch (com = uap->com) {
775 case FIONCLEX:
776 *fdflags(p, uap->fd) &= ~UF_EXCLOSE;
777 error =0;
778 goto out;
779 case FIOCLEX:
780 *fdflags(p, uap->fd) |= UF_EXCLOSE;
781 error =0;
782 goto out;
783 }
784
785 /*
786 * Interpret high order word to find amount of data to be
787 * copied to/from the user's address space.
788 */
789 size = IOCPARM_LEN(com);
790 if (size > IOCPARM_MAX) {
791 error = ENOTTY;
792 goto out;
793 }
794 memp = NULL;
795 if (size > sizeof (stkbuf)) {
796 proc_fdunlock(p);
797 if ((memp = (caddr_t)kalloc(size)) == 0) {
798 proc_fdlock(p);
799 error = ENOMEM;
800 goto out;
801 }
802 proc_fdlock(p);
803 datap = memp;
804 } else
805 datap = &stkbuf[0];
806 if (com&IOC_IN) {
807 if (size) {
808 proc_fdunlock(p);
809 error = copyin(uap->data, datap, size);
810 if (error) {
811 if (memp)
812 kfree(memp, size);
813 proc_fdlock(p);
814 goto out;
815 }
816 proc_fdlock(p);
817 } else {
818 /* XXX - IOC_IN and no size? we should proably return an error here!! */
819 if (is64bit) {
820 *(user_addr_t *)datap = uap->data;
821 }
822 else {
823 *(uint32_t *)datap = (uint32_t)uap->data;
824 }
825 }
826 } else if ((com&IOC_OUT) && size)
827 /*
828 * Zero the buffer so the user always
829 * gets back something deterministic.
830 */
831 bzero(datap, size);
832 else if (com&IOC_VOID) {
833 /* XXX - this is odd since IOC_VOID means no parameters */
834 if (is64bit) {
835 *(user_addr_t *)datap = uap->data;
836 }
837 else {
838 *(uint32_t *)datap = (uint32_t)uap->data;
839 }
840 }
841
842 switch (com) {
843
844 case FIONBIO:
845 if ( (tmp = *(int *)datap) )
846 fp->f_flag |= FNONBLOCK;
847 else
848 fp->f_flag &= ~FNONBLOCK;
849 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
850 break;
851
852 case FIOASYNC:
853 if ( (tmp = *(int *)datap) )
854 fp->f_flag |= FASYNC;
855 else
856 fp->f_flag &= ~FASYNC;
857 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
858 break;
859
860 case FIOSETOWN:
861 tmp = *(int *)datap;
862 if (fp->f_type == DTYPE_SOCKET) {
863 ((struct socket *)fp->f_data)->so_pgid = tmp;
864 error = 0;
865 break;
866 }
867 if (fp->f_type == DTYPE_PIPE) {
868 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
869 break;
870 }
871 if (tmp <= 0) {
872 tmp = -tmp;
873 } else {
874 struct proc *p1 = proc_find(tmp);
875 if (p1 == 0) {
876 error = ESRCH;
877 break;
878 }
879 tmp = p1->p_pgrpid;
880 proc_rele(p1);
881 }
882 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context);
883 break;
884
885 case FIOGETOWN:
886 if (fp->f_type == DTYPE_SOCKET) {
887 error = 0;
888 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
889 break;
890 }
891 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
892 *(int *)datap = -*(int *)datap;
893 break;
894
895 default:
896 error = fo_ioctl(fp, com, datap, &context);
897 /*
898 * Copy any data to user, size was
899 * already set and checked above.
900 */
901 if (error == 0 && (com&IOC_OUT) && size)
902 error = copyout(datap, uap->data, (u_int)size);
903 break;
904 }
905 proc_fdunlock(p);
906 if (memp)
907 kfree(memp, size);
908 proc_fdlock(p);
909 out:
910 fp_drop(p, fd, fp, 1);
911 proc_fdunlock(p);
912 return(error);
913 }
914
915 int selwait, nselcoll;
916 #define SEL_FIRSTPASS 1
917 #define SEL_SECONDPASS 2
918 extern int selcontinue(int error);
919 extern int selprocess(int error, int sel_pass);
920 static int selscan(struct proc *p, struct _select * sel,
921 int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub);
922 static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits,
923 int nfd, int * count, int *kfcount);
924 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd);
925 extern uint64_t tvtoabstime(struct timeval *tvp);
926
927 /*
928 * Select system call.
929 *
930 * Returns: 0 Success
931 * EINVAL Invalid argument
932 * EAGAIN Nonconformant error if allocation fails
933 * selprocess:???
934 */
935 int
936 select(struct proc *p, struct select_args *uap, register_t *retval)
937 {
938 __pthread_testcancel(1);
939 return(select_nocancel(p, (struct select_nocancel_args *)uap, retval));
940 }
941
942 int
943 select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *retval)
944 {
945 int error = 0;
946 u_int ni, nw, size;
947 thread_t th_act;
948 struct uthread *uth;
949 struct _select *sel;
950 int needzerofill = 1;
951 int count = 0;
952 int kfcount = 0;
953
954 th_act = current_thread();
955 uth = get_bsdthread_info(th_act);
956 sel = &uth->uu_select;
957 retval = (int *)get_bsduthreadrval(th_act);
958 *retval = 0;
959
960 if (uap->nd < 0) {
961 return (EINVAL);
962 }
963
964 /* select on thread of process that already called proc_exit() */
965 if (p->p_fd == NULL) {
966 return (EBADF);
967 }
968
969 if (uap->nd > p->p_fd->fd_nfiles)
970 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
971
972 nw = howmany(uap->nd, NFDBITS);
973 ni = nw * sizeof(fd_mask);
974
975 /*
976 * if the previously allocated space for the bits is smaller than
977 * what is requested or no space has yet been allocated for this
978 * thread, allocate enough space now.
979 *
980 * Note: If this process fails, select() will return EAGAIN; this
981 * is the same thing pool() returns in a no-memory situation, but
982 * it is not a POSIX compliant error code for select().
983 */
984 if (sel->nbytes < (3 * ni)) {
985 int nbytes = 3 * ni;
986
987 /* Free previous allocation, if any */
988 if (sel->ibits != NULL)
989 FREE(sel->ibits, M_TEMP);
990 if (sel->obits != NULL) {
991 FREE(sel->obits, M_TEMP);
992 /* NULL out; subsequent ibits allocation may fail */
993 sel->obits = NULL;
994 }
995
996 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
997 if (sel->ibits == NULL)
998 return (EAGAIN);
999 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1000 if (sel->obits == NULL) {
1001 FREE(sel->ibits, M_TEMP);
1002 sel->ibits = NULL;
1003 return (EAGAIN);
1004 }
1005 sel->nbytes = nbytes;
1006 needzerofill = 0;
1007 }
1008
1009 if (needzerofill) {
1010 bzero((caddr_t)sel->ibits, sel->nbytes);
1011 bzero((caddr_t)sel->obits, sel->nbytes);
1012 }
1013
1014 /*
1015 * get the bits from the user address space
1016 */
1017 #define getbits(name, x) \
1018 do { \
1019 if (uap->name && (error = copyin(uap->name, \
1020 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1021 goto continuation; \
1022 } while (0)
1023
1024 getbits(in, 0);
1025 getbits(ou, 1);
1026 getbits(ex, 2);
1027 #undef getbits
1028
1029 if (uap->tv) {
1030 struct timeval atv;
1031 if (IS_64BIT_PROCESS(p)) {
1032 struct user_timeval atv64;
1033 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1034 /* Loses resolution - assume timeout < 68 years */
1035 atv.tv_sec = atv64.tv_sec;
1036 atv.tv_usec = atv64.tv_usec;
1037 } else {
1038 error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv));
1039 }
1040 if (error)
1041 goto continuation;
1042 if (itimerfix(&atv)) {
1043 error = EINVAL;
1044 goto continuation;
1045 }
1046
1047 clock_absolutetime_interval_to_deadline(
1048 tvtoabstime(&atv), &sel->abstime);
1049 }
1050 else
1051 sel->abstime = 0;
1052
1053 sel->kfcount = 0;
1054 if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) {
1055 goto continuation;
1056 }
1057 sel->count = count;
1058 sel->kfcount = kfcount;
1059 size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK);
1060 if (uth->uu_allocsize) {
1061 if (uth->uu_wqset == 0)
1062 panic("select: wql memory smashed");
1063 /* needed for the select now */
1064 if (size > uth->uu_allocsize) {
1065 kfree(uth->uu_wqset, uth->uu_allocsize);
1066 uth->uu_allocsize = size;
1067 uth->uu_wqset = (wait_queue_set_t)kalloc(size);
1068 if (uth->uu_wqset == (wait_queue_set_t)NULL)
1069 panic("failed to allocate memory for waitqueue\n");
1070 }
1071 } else {
1072 sel->count = count;
1073 uth->uu_allocsize = size;
1074 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize);
1075 if (uth->uu_wqset == (wait_queue_set_t)NULL)
1076 panic("failed to allocate memory for waitqueue\n");
1077 }
1078 bzero(uth->uu_wqset, size);
1079 sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET;
1080 wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST));
1081
1082 continuation:
1083 return selprocess(error, SEL_FIRSTPASS);
1084 }
1085
1086 int
1087 selcontinue(int error)
1088 {
1089 return selprocess(error, SEL_SECONDPASS);
1090 }
1091
1092 int
1093 selprocess(int error, int sel_pass)
1094 {
1095 int ncoll;
1096 u_int ni, nw;
1097 thread_t th_act;
1098 struct uthread *uth;
1099 struct proc *p;
1100 struct select_args *uap;
1101 int *retval;
1102 struct _select *sel;
1103 int unwind = 1;
1104 int prepost = 0;
1105 int somewakeup = 0;
1106 int doretry = 0;
1107 wait_result_t wait_result;
1108
1109 p = current_proc();
1110 th_act = current_thread();
1111 uap = (struct select_args *)get_bsduthreadarg(th_act);
1112 retval = (int *)get_bsduthreadrval(th_act);
1113 uth = get_bsdthread_info(th_act);
1114 sel = &uth->uu_select;
1115
1116 /* if it is first pass wait queue is not setup yet */
1117 if ((error != 0) && (sel_pass == SEL_FIRSTPASS))
1118 unwind = 0;
1119 if (sel->count == 0)
1120 unwind = 0;
1121 retry:
1122 if (error != 0) {
1123 goto done;
1124 }
1125
1126 ncoll = nselcoll;
1127 OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
1128 /* skip scans if the select is just for timeouts */
1129 if (sel->count) {
1130 if (sel_pass == SEL_FIRSTPASS)
1131 wait_queue_sub_clearrefs(uth->uu_wqset);
1132
1133 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset);
1134 if (error || *retval) {
1135 goto done;
1136 }
1137 if (prepost) {
1138 /* if the select of log, then we canwakeup and discover some one
1139 * else already read the data; go toselct again if time permits
1140 */
1141 prepost = 0;
1142 doretry = 1;
1143 }
1144 if (somewakeup) {
1145 somewakeup = 0;
1146 doretry = 1;
1147 }
1148 }
1149
1150 if (uap->tv) {
1151 uint64_t now;
1152
1153 clock_get_uptime(&now);
1154 if (now >= sel->abstime)
1155 goto done;
1156 }
1157
1158 if (doretry) {
1159 /* cleanup obits and try again */
1160 doretry = 0;
1161 sel_pass = SEL_FIRSTPASS;
1162 goto retry;
1163 }
1164
1165 /*
1166 * To effect a poll, the timeout argument should be
1167 * non-nil, pointing to a zero-valued timeval structure.
1168 */
1169 if (uap->tv && sel->abstime == 0) {
1170 goto done;
1171 }
1172
1173 /* No spurious wakeups due to colls,no need to check for them */
1174 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1175 sel_pass = SEL_FIRSTPASS;
1176 goto retry;
1177 }
1178
1179 OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1180
1181 /* if the select is just for timeout skip check */
1182 if (sel->count &&(sel_pass == SEL_SECONDPASS))
1183 panic("selprocess: 2nd pass assertwaiting");
1184
1185 /* Wait Queue Subordinate has waitqueue as first element */
1186 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset,
1187 &selwait, THREAD_ABORTSAFE, sel->abstime);
1188 if (wait_result != THREAD_AWAKENED) {
1189 /* there are no preposted events */
1190 error = tsleep1(NULL, PSOCK | PCATCH,
1191 "select", 0, selcontinue);
1192 } else {
1193 prepost = 1;
1194 error = 0;
1195 }
1196
1197 sel_pass = SEL_SECONDPASS;
1198 if (error == 0) {
1199 if (!prepost)
1200 somewakeup =1;
1201 goto retry;
1202 }
1203 done:
1204 if (unwind) {
1205 wait_subqueue_unlink_all(uth->uu_wqset);
1206 seldrop(p, sel->ibits, uap->nd);
1207 }
1208 OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1209 /* select is not restarted after signals... */
1210 if (error == ERESTART)
1211 error = EINTR;
1212 if (error == EWOULDBLOCK)
1213 error = 0;
1214 nw = howmany(uap->nd, NFDBITS);
1215 ni = nw * sizeof(fd_mask);
1216
1217 #define putbits(name, x) \
1218 do { \
1219 if (uap->name && (error2 = \
1220 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1221 error = error2; \
1222 } while (0)
1223
1224 if (error == 0) {
1225 int error2;
1226
1227 putbits(in, 0);
1228 putbits(ou, 1);
1229 putbits(ex, 2);
1230 #undef putbits
1231 }
1232 return(error);
1233 }
1234
1235 static int
1236 selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval,
1237 int sel_pass, wait_queue_sub_t wqsub)
1238 {
1239 struct filedesc *fdp = p->p_fd;
1240 int msk, i, j, fd;
1241 u_int32_t bits;
1242 struct fileproc *fp;
1243 int n = 0;
1244 int nc = 0;
1245 static int flag[3] = { FREAD, FWRITE, 0 };
1246 u_int32_t *iptr, *optr;
1247 u_int nw;
1248 u_int32_t *ibits, *obits;
1249 char * wql;
1250 char * wql_ptr;
1251 int count, kfcount;
1252 boolean_t funnel_state;
1253 vnode_t vp;
1254 struct vfs_context context = *vfs_context_current();
1255
1256 /*
1257 * Problems when reboot; due to MacOSX signal probs
1258 * in Beaker1C ; verify that the p->p_fd is valid
1259 */
1260 if (fdp == NULL) {
1261 *retval=0;
1262 return(EIO);
1263 }
1264 ibits = sel->ibits;
1265 obits = sel->obits;
1266 wql = sel->wql;
1267
1268 nw = howmany(nfd, NFDBITS);
1269
1270 count = sel->count;
1271 kfcount = sel->kfcount;
1272
1273 if (kfcount > count)
1274 panic("selscan: count < kfcount");
1275
1276 if (kfcount != 0) {
1277 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1278
1279 proc_fdlock(p);
1280 for (msk = 0; msk < 3; msk++) {
1281 iptr = (u_int32_t *)&ibits[msk * nw];
1282 optr = (u_int32_t *)&obits[msk * nw];
1283
1284 for (i = 0; i < nfd; i += NFDBITS) {
1285 bits = iptr[i/NFDBITS];
1286
1287 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1288 bits &= ~(1 << j);
1289 fp = fdp->fd_ofiles[fd];
1290
1291 if (fp == NULL ||
1292 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1293 proc_fdunlock(p);
1294 thread_funnel_set(kernel_flock, funnel_state);
1295 return(EBADF);
1296 }
1297 if (sel_pass == SEL_SECONDPASS) {
1298 wql_ptr = (char *)0;
1299 fp->f_flags &= ~FP_INSELECT;
1300 fp->f_waddr = (void *)0;
1301 } else {
1302 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1303 fp->f_flags |= FP_INSELECT;
1304 fp->f_waddr = (void *)wqsub;
1305 }
1306
1307 context.vc_ucred = fp->f_cred;
1308
1309 if (fp->f_ops && (fp->f_type == DTYPE_VNODE)
1310 && ((vp = (struct vnode *)fp->f_data) != NULLVP)
1311 && (vp->v_type == VCHR)
1312 && fo_select(fp, flag[msk], wql_ptr, &context)) {
1313 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1314 n++;
1315 }
1316 nc++;
1317 }
1318 }
1319 }
1320 proc_fdunlock(p);
1321 thread_funnel_set(kernel_flock, funnel_state);
1322 }
1323
1324 nc = 0;
1325 if (kfcount != count) {
1326 proc_fdlock(p);
1327 for (msk = 0; msk < 3; msk++) {
1328 iptr = (u_int32_t *)&ibits[msk * nw];
1329 optr = (u_int32_t *)&obits[msk * nw];
1330
1331 for (i = 0; i < nfd; i += NFDBITS) {
1332 bits = iptr[i/NFDBITS];
1333
1334 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1335 bits &= ~(1 << j);
1336 fp = fdp->fd_ofiles[fd];
1337
1338 if (fp == NULL ||
1339 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1340 proc_fdunlock(p);
1341 return(EBADF);
1342 }
1343 if (sel_pass == SEL_SECONDPASS) {
1344 wql_ptr = (char *)0;
1345 fp->f_flags &= ~FP_INSELECT;
1346 fp->f_waddr = (void *)0;
1347 } else {
1348 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK);
1349 fp->f_flags |= FP_INSELECT;
1350 fp->f_waddr = (void *)wqsub;
1351 }
1352
1353 context.vc_ucred = fp->f_cred;
1354
1355 if ((fp->f_ops &&
1356 ((fp->f_type != DTYPE_VNODE)
1357 || (((vp = (struct vnode *)fp->f_data) != NULLVP)
1358 && (vp->v_type != VCHR))
1359 )
1360 && fo_select(fp, flag[msk], wql_ptr, &context))) {
1361 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS));
1362 n++;
1363 }
1364 nc++;
1365 }
1366 }
1367 }
1368 proc_fdunlock(p);
1369 }
1370 *retval = n;
1371 return (0);
1372 }
1373
1374 int poll_callback(struct kqueue *, struct kevent *, void *);
1375
1376 struct poll_continue_args {
1377 user_addr_t pca_fds;
1378 u_int pca_nfds;
1379 u_int pca_rfds;
1380 };
1381
1382 int
1383 poll(struct proc *p, struct poll_args *uap, register_t *retval)
1384 {
1385 __pthread_testcancel(1);
1386 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval));
1387 }
1388
1389
1390 int
1391 poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval)
1392 {
1393 struct poll_continue_args *cont;
1394 struct pollfd *fds;
1395 struct kqueue *kq;
1396 struct timeval atv;
1397 int ncoll, error = 0;
1398 u_int nfds = uap->nfds;
1399 u_int rfds = 0;
1400 u_int i;
1401 size_t ni;
1402
1403 /*
1404 * This is kinda bogus. We have fd limits, but that is not
1405 * really related to the size of the pollfd array. Make sure
1406 * we let the process use at least FD_SETSIZE entries and at
1407 * least enough for the current limits. We want to be reasonably
1408 * safe, but not overly restrictive.
1409 */
1410 if (nfds > OPEN_MAX ||
1411 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE)))
1412 return (EINVAL);
1413
1414 kq = kqueue_alloc(p);
1415 if (kq == NULL)
1416 return (EAGAIN);
1417
1418 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args);
1419 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK);
1420 if (NULL == cont) {
1421 error = EAGAIN;
1422 goto out;
1423 }
1424
1425 fds = (struct pollfd *)&cont[1];
1426 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1427 if (error)
1428 goto out;
1429
1430 if (uap->timeout != -1) {
1431 struct timeval rtv;
1432
1433 atv.tv_sec = uap->timeout / 1000;
1434 atv.tv_usec = (uap->timeout % 1000) * 1000;
1435 if (itimerfix(&atv)) {
1436 error = EINVAL;
1437 goto out;
1438 }
1439 getmicrouptime(&rtv);
1440 timevaladd(&atv, &rtv);
1441 } else {
1442 atv.tv_sec = 0;
1443 atv.tv_usec = 0;
1444 }
1445
1446 /* JMM - all this P_SELECT stuff is bogus */
1447 ncoll = nselcoll;
1448 OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag);
1449 for (i = 0; i < nfds; i++) {
1450 short events = fds[i].events;
1451 struct kevent kev;
1452 int kerror = 0;
1453
1454 /* per spec, ignore fd values below zero */
1455 if (fds[i].fd < 0) {
1456 fds[i].revents = 0;
1457 continue;
1458 }
1459
1460 /* convert the poll event into a kqueue kevent */
1461 kev.ident = fds[i].fd;
1462 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL;
1463 kev.fflags = NOTE_LOWAT;
1464 kev.data = 1; /* efficiency be damned: any data should trigger */
1465 kev.udata = CAST_USER_ADDR_T(&fds[i]);
1466
1467 /* Handle input events */
1468 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) {
1469 kev.filter = EVFILT_READ;
1470 if (!(events & ( POLLIN | POLLRDNORM )))
1471 kev.flags |= EV_OOBAND;
1472 kerror = kevent_register(kq, &kev, p);
1473 }
1474
1475 /* Handle output events */
1476 if (kerror == 0 &&
1477 events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) {
1478 kev.filter = EVFILT_WRITE;
1479 kerror = kevent_register(kq, &kev, p);
1480 }
1481
1482 /* Handle BSD extension vnode events */
1483 if (kerror == 0 &&
1484 events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) {
1485 kev.filter = EVFILT_VNODE;
1486 kev.fflags = 0;
1487 if (events & POLLEXTEND)
1488 kev.fflags |= NOTE_EXTEND;
1489 if (events & POLLATTRIB)
1490 kev.fflags |= NOTE_ATTRIB;
1491 if (events & POLLNLINK)
1492 kev.fflags |= NOTE_LINK;
1493 if (events & POLLWRITE)
1494 kev.fflags |= NOTE_WRITE;
1495 kerror = kevent_register(kq, &kev, p);
1496 }
1497
1498 if (kerror != 0) {
1499 fds[i].revents = POLLNVAL;
1500 rfds++;
1501 } else
1502 fds[i].revents = 0;
1503 }
1504
1505 /* Did we have any trouble registering? */
1506 if (rfds > 0)
1507 goto done;
1508
1509 /* scan for, and possibly wait for, the kevents to trigger */
1510 cont->pca_fds = uap->fds;
1511 cont->pca_nfds = nfds;
1512 cont->pca_rfds = rfds;
1513 error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p);
1514 rfds = cont->pca_rfds;
1515
1516 done:
1517 OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag);
1518 /* poll is not restarted after signals... */
1519 if (error == ERESTART)
1520 error = EINTR;
1521 if (error == EWOULDBLOCK)
1522 error = 0;
1523 if (error == 0) {
1524 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1525 *retval = rfds;
1526 }
1527
1528 out:
1529 if (NULL != cont)
1530 FREE(cont, M_TEMP);
1531
1532 kqueue_dealloc(kq);
1533 return (error);
1534 }
1535
1536 int
1537 poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data)
1538 {
1539 struct poll_continue_args *cont = (struct poll_continue_args *)data;
1540 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
1541 short mask;
1542
1543 /* convert the results back into revents */
1544 if (kevp->flags & EV_EOF)
1545 fds->revents |= POLLHUP;
1546 if (kevp->flags & EV_ERROR)
1547 fds->revents |= POLLERR;
1548
1549 switch (kevp->filter) {
1550 case EVFILT_READ:
1551 if (fds->revents & POLLHUP)
1552 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND );
1553 else {
1554 mask = 0;
1555 if (kevp->data != 0)
1556 mask |= (POLLIN | POLLRDNORM );
1557 if (kevp->flags & EV_OOBAND)
1558 mask |= ( POLLPRI | POLLRDBAND );
1559 }
1560 fds->revents |= (fds->events & mask);
1561 break;
1562
1563 case EVFILT_WRITE:
1564 if (!(fds->revents & POLLHUP))
1565 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND ));
1566 break;
1567
1568 case EVFILT_VNODE:
1569 if (kevp->fflags & NOTE_EXTEND)
1570 fds->revents |= (fds->events & POLLEXTEND);
1571 if (kevp->fflags & NOTE_ATTRIB)
1572 fds->revents |= (fds->events & POLLATTRIB);
1573 if (kevp->fflags & NOTE_LINK)
1574 fds->revents |= (fds->events & POLLNLINK);
1575 if (kevp->fflags & NOTE_WRITE)
1576 fds->revents |= (fds->events & POLLWRITE);
1577 break;
1578 }
1579
1580 if (fds->revents)
1581 cont->pca_rfds++;
1582
1583 return 0;
1584 }
1585
1586 int
1587 seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
1588 {
1589
1590 return (1);
1591 }
1592
1593 static int
1594 selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits,
1595 int nfd, int *countp, int * kfcountp)
1596 {
1597 struct filedesc *fdp = p->p_fd;
1598 int msk, i, j, fd;
1599 u_int32_t bits;
1600 struct fileproc *fp;
1601 int n = 0;
1602 u_int32_t *iptr;
1603 u_int nw;
1604 int error=0;
1605 int kfc = 0;
1606 int dropcount;
1607 vnode_t vp;
1608
1609 /*
1610 * Problems when reboot; due to MacOSX signal probs
1611 * in Beaker1C ; verify that the p->p_fd is valid
1612 */
1613 if (fdp == NULL) {
1614 *countp = 0;
1615 *kfcountp = 0;
1616 return(EIO);
1617 }
1618 nw = howmany(nfd, NFDBITS);
1619
1620 proc_fdlock(p);
1621 for (msk = 0; msk < 3; msk++) {
1622 iptr = (u_int32_t *)&ibits[msk * nw];
1623 for (i = 0; i < nfd; i += NFDBITS) {
1624 bits = iptr[i/NFDBITS];
1625 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1626 bits &= ~(1 << j);
1627 fp = fdp->fd_ofiles[fd];
1628 if (fp == NULL ||
1629 (fdp->fd_ofileflags[fd] & UF_RESERVED)) {
1630 *countp = 0;
1631 *kfcountp = 0;
1632 error = EBADF;
1633 goto bad;
1634 }
1635 fp->f_iocount++;
1636 if ((fp->f_type == DTYPE_VNODE)
1637 && ((vp = (struct vnode *)fp->f_data) != NULLVP)
1638 && (vp->v_type == VCHR) )
1639 kfc++;
1640
1641 n++;
1642 }
1643 }
1644 }
1645 proc_fdunlock(p);
1646
1647 *countp = n;
1648 *kfcountp = kfc;
1649 return (0);
1650 bad:
1651 dropcount = 0;
1652
1653 if (n== 0)
1654 goto out;
1655 /* undo the iocounts */
1656 for (msk = 0; msk < 3; msk++) {
1657 iptr = (u_int32_t *)&ibits[msk * nw];
1658 for (i = 0; i < nfd; i += NFDBITS) {
1659 bits = iptr[i/NFDBITS];
1660 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1661 bits &= ~(1 << j);
1662 fp = fdp->fd_ofiles[fd];
1663 if (dropcount >= n)
1664 goto out;
1665 fp->f_iocount--;
1666
1667 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1668 p->p_fpdrainwait = 0;
1669 wakeup(&p->p_fpdrainwait);
1670 }
1671 dropcount++;
1672 }
1673 }
1674 }
1675 out:
1676 proc_fdunlock(p);
1677 return(error);
1678 }
1679
1680 static int
1681 seldrop(struct proc *p, u_int32_t *ibits, int nfd)
1682 {
1683 struct filedesc *fdp = p->p_fd;
1684 int msk, i, j, fd;
1685 u_int32_t bits;
1686 struct fileproc *fp;
1687 int n = 0;
1688 u_int32_t *iptr;
1689 u_int nw;
1690
1691 /*
1692 * Problems when reboot; due to MacOSX signal probs
1693 * in Beaker1C ; verify that the p->p_fd is valid
1694 */
1695 if (fdp == NULL) {
1696 return(EIO);
1697 }
1698
1699 nw = howmany(nfd, NFDBITS);
1700
1701
1702 proc_fdlock(p);
1703 for (msk = 0; msk < 3; msk++) {
1704 iptr = (u_int32_t *)&ibits[msk * nw];
1705 for (i = 0; i < nfd; i += NFDBITS) {
1706 bits = iptr[i/NFDBITS];
1707 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1708 bits &= ~(1 << j);
1709 fp = fdp->fd_ofiles[fd];
1710 if (fp == NULL
1711 #if 0
1712 /* if you are here then it is being closed */
1713 || (fdp->fd_ofileflags[fd] & UF_RESERVED)
1714 #endif
1715 ) {
1716 proc_fdunlock(p);
1717 return(EBADF);
1718 }
1719 n++;
1720 fp->f_iocount--;
1721 fp->f_flags &= ~FP_INSELECT;
1722
1723 if (p->p_fpdrainwait && fp->f_iocount == 0) {
1724 p->p_fpdrainwait = 0;
1725 wakeup(&p->p_fpdrainwait);
1726 }
1727 }
1728 }
1729 }
1730 proc_fdunlock(p);
1731 return (0);
1732 }
1733
1734 /*
1735 * Record a select request.
1736 */
1737 void
1738 selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql)
1739 {
1740 thread_t cur_act = current_thread();
1741 struct uthread * ut = get_bsdthread_info(cur_act);
1742
1743 /* need to look at collisions */
1744
1745 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) {
1746 return;
1747 }
1748
1749 /*do not record if this is second pass of select */
1750 if((p_wql == (void *)0)) {
1751 return;
1752 }
1753
1754 if ((sip->si_flags & SI_INITED) == 0) {
1755 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO);
1756 sip->si_flags |= SI_INITED;
1757 sip->si_flags &= ~SI_CLEAR;
1758 }
1759
1760 if (sip->si_flags & SI_RECORDED) {
1761 sip->si_flags |= SI_COLL;
1762 } else
1763 sip->si_flags &= ~SI_COLL;
1764
1765 sip->si_flags |= SI_RECORDED;
1766 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset))
1767 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset,
1768 (wait_queue_link_t)p_wql);
1769
1770 return;
1771 }
1772
1773 void
1774 selwakeup(struct selinfo *sip)
1775 {
1776
1777 if ((sip->si_flags & SI_INITED) == 0) {
1778 return;
1779 }
1780
1781 if (sip->si_flags & SI_COLL) {
1782 nselcoll++;
1783 sip->si_flags &= ~SI_COLL;
1784 #if 0
1785 /* will not support */
1786 //wakeup((caddr_t)&selwait);
1787 #endif
1788 }
1789
1790 if (sip->si_flags & SI_RECORDED) {
1791 wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED);
1792 sip->si_flags &= ~SI_RECORDED;
1793 }
1794
1795 }
1796
1797 void
1798 selthreadclear(struct selinfo *sip)
1799 {
1800
1801 if ((sip->si_flags & SI_INITED) == 0) {
1802 return;
1803 }
1804 if (sip->si_flags & SI_RECORDED) {
1805 selwakeup(sip);
1806 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1807 }
1808 sip->si_flags |= SI_CLEAR;
1809 wait_queue_unlinkall_nofree(&sip->si_wait_queue);
1810 }
1811
1812
1813
1814
1815 #define DBG_POST 0x10
1816 #define DBG_WATCH 0x11
1817 #define DBG_WAIT 0x12
1818 #define DBG_MOD 0x13
1819 #define DBG_EWAKEUP 0x14
1820 #define DBG_ENQUEUE 0x15
1821 #define DBG_DEQUEUE 0x16
1822
1823 #define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST)
1824 #define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH)
1825 #define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT)
1826 #define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD)
1827 #define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP)
1828 #define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE)
1829 #define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE)
1830
1831
1832 #define EVPROCDEQUE(p, evq) do { \
1833 proc_lock(p); \
1834 if (evq->ee_flags & EV_QUEUED) { \
1835 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \
1836 evq->ee_flags &= ~EV_QUEUED; \
1837 } \
1838 proc_unlock(p); \
1839 } while (0);
1840
1841
1842 /*
1843 * called upon socket close. deque and free all events for
1844 * the socket... socket must be locked by caller.
1845 */
1846 void
1847 evsofree(struct socket *sp)
1848 {
1849 struct eventqelt *evq, *next;
1850 proc_t p;
1851
1852 if (sp == NULL)
1853 return;
1854
1855 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) {
1856 next = evq->ee_slist.tqe_next;
1857 p = evq->ee_proc;
1858
1859 if (evq->ee_flags & EV_QUEUED) {
1860 EVPROCDEQUE(p, evq);
1861 }
1862 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q
1863 FREE(evq, M_TEMP);
1864 }
1865 }
1866
1867
1868 /*
1869 * called upon pipe close. deque and free all events for
1870 * the pipe... pipe must be locked by caller
1871 */
1872 void
1873 evpipefree(struct pipe *cpipe)
1874 {
1875 struct eventqelt *evq, *next;
1876 proc_t p;
1877
1878 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) {
1879 next = evq->ee_slist.tqe_next;
1880 p = evq->ee_proc;
1881
1882 EVPROCDEQUE(p, evq);
1883
1884 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q
1885 FREE(evq, M_TEMP);
1886 }
1887 }
1888
1889
1890 /*
1891 * enqueue this event if it's not already queued. wakeup
1892 * the proc if we do queue this event to it...
1893 * entered with proc lock held... we drop it before
1894 * doing the wakeup and return in that state
1895 */
1896 static void
1897 evprocenque(struct eventqelt *evq)
1898 {
1899 proc_t p;
1900
1901 assert(evq);
1902 p = evq->ee_proc;
1903
1904 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0);
1905
1906 proc_lock(p);
1907
1908 if (evq->ee_flags & EV_QUEUED) {
1909 proc_unlock(p);
1910
1911 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1912 return;
1913 }
1914 evq->ee_flags |= EV_QUEUED;
1915
1916 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist);
1917
1918 proc_unlock(p);
1919
1920 wakeup(&p->p_evlist);
1921
1922 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0);
1923 }
1924
1925
1926 /*
1927 * pipe lock must be taken by the caller
1928 */
1929 void
1930 postpipeevent(struct pipe *pipep, int event)
1931 {
1932 int mask;
1933 struct eventqelt *evq;
1934
1935 if (pipep == NULL)
1936 return;
1937 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0);
1938
1939 for (evq = pipep->pipe_evlist.tqh_first;
1940 evq != NULL; evq = evq->ee_slist.tqe_next) {
1941
1942 if (evq->ee_eventmask == 0)
1943 continue;
1944 mask = 0;
1945
1946 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) {
1947
1948 case EV_RWBYTES:
1949 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) {
1950 mask |= EV_RE;
1951 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt;
1952 }
1953 if ((evq->ee_eventmask & EV_WR) &&
1954 (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) {
1955
1956 if (pipep->pipe_state & PIPE_EOF) {
1957 mask |= EV_WR|EV_RESET;
1958 break;
1959 }
1960 mask |= EV_WR;
1961 evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt;
1962 }
1963 break;
1964
1965 case EV_WCLOSED:
1966 case EV_RCLOSED:
1967 if ((evq->ee_eventmask & EV_RE)) {
1968 mask |= EV_RE|EV_RCLOSED;
1969 }
1970 if ((evq->ee_eventmask & EV_WR)) {
1971 mask |= EV_WR|EV_WCLOSED;
1972 }
1973 break;
1974
1975 default:
1976 return;
1977 }
1978 if (mask) {
1979 /*
1980 * disarm... postevents are nops until this event is 'read' via
1981 * waitevent and then re-armed via modwatch
1982 */
1983 evq->ee_eventmask = 0;
1984
1985 /*
1986 * since events are disarmed until after the waitevent
1987 * the ee_req.er_xxxx fields can't change once we've
1988 * inserted this event into the proc queue...
1989 * therefore, the waitevent will see a 'consistent'
1990 * snapshot of the event, even though it won't hold
1991 * the pipe lock, and we're updating the event outside
1992 * of the proc lock, which it will hold
1993 */
1994 evq->ee_req.er_eventbits |= mask;
1995
1996 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0);
1997
1998 evprocenque(evq);
1999 }
2000 }
2001 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0);
2002 }
2003
2004 #if SOCKETS
2005 /*
2006 * given either a sockbuf or a socket run down the
2007 * event list and queue ready events found...
2008 * the socket must be locked by the caller
2009 */
2010 void
2011 postevent(struct socket *sp, struct sockbuf *sb, int event)
2012 {
2013 int mask;
2014 struct eventqelt *evq;
2015 struct tcpcb *tp;
2016
2017 if (sb)
2018 sp = sb->sb_so;
2019 if (sp == NULL)
2020 return;
2021
2022 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0);
2023
2024 for (evq = sp->so_evlist.tqh_first;
2025 evq != NULL; evq = evq->ee_slist.tqe_next) {
2026
2027 if (evq->ee_eventmask == 0)
2028 continue;
2029 mask = 0;
2030
2031 /* ready for reading:
2032 - byte cnt >= receive low water mark
2033 - read-half of conn closed
2034 - conn pending for listening sock
2035 - socket error pending
2036
2037 ready for writing
2038 - byte cnt avail >= send low water mark
2039 - write half of conn closed
2040 - socket error pending
2041 - non-blocking conn completed successfully
2042
2043 exception pending
2044 - out of band data
2045 - sock at out of band mark
2046 */
2047
2048 switch (event & EV_DMASK) {
2049
2050 case EV_OOB:
2051 if ((evq->ee_eventmask & EV_EX)) {
2052 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2053 mask |= EV_EX|EV_OOB;
2054 }
2055 break;
2056
2057 case EV_RWBYTES|EV_OOB:
2058 if ((evq->ee_eventmask & EV_EX)) {
2059 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK)))
2060 mask |= EV_EX|EV_OOB;
2061 }
2062 /*
2063 * fall into the next case
2064 */
2065 case EV_RWBYTES:
2066 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) {
2067 if (sp->so_error) {
2068 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2069 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2070 (tp->t_state == TCPS_CLOSED)) {
2071 mask |= EV_RE|EV_RESET;
2072 break;
2073 }
2074 }
2075 }
2076 mask |= EV_RE;
2077 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc;
2078
2079 if (sp->so_state & SS_CANTRCVMORE) {
2080 mask |= EV_FIN;
2081 break;
2082 }
2083 }
2084 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) {
2085 if (sp->so_error) {
2086 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) {
2087 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) ||
2088 (tp->t_state == TCPS_CLOSED)) {
2089 mask |= EV_WR|EV_RESET;
2090 break;
2091 }
2092 }
2093 }
2094 mask |= EV_WR;
2095 evq->ee_req.er_wcnt = sbspace(&sp->so_snd);
2096 }
2097 break;
2098
2099 case EV_RCONN:
2100 if ((evq->ee_eventmask & EV_RE)) {
2101 mask |= EV_RE|EV_RCONN;
2102 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one
2103 }
2104 break;
2105
2106 case EV_WCONN:
2107 if ((evq->ee_eventmask & EV_WR)) {
2108 mask |= EV_WR|EV_WCONN;
2109 }
2110 break;
2111
2112 case EV_RCLOSED:
2113 if ((evq->ee_eventmask & EV_RE)) {
2114 mask |= EV_RE|EV_RCLOSED;
2115 }
2116 break;
2117
2118 case EV_WCLOSED:
2119 if ((evq->ee_eventmask & EV_WR)) {
2120 mask |= EV_WR|EV_WCLOSED;
2121 }
2122 break;
2123
2124 case EV_FIN:
2125 if (evq->ee_eventmask & EV_RE) {
2126 mask |= EV_RE|EV_FIN;
2127 }
2128 break;
2129
2130 case EV_RESET:
2131 case EV_TIMEOUT:
2132 if (evq->ee_eventmask & EV_RE) {
2133 mask |= EV_RE | event;
2134 }
2135 if (evq->ee_eventmask & EV_WR) {
2136 mask |= EV_WR | event;
2137 }
2138 break;
2139
2140 default:
2141 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0);
2142 return;
2143 } /* switch */
2144
2145 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0);
2146
2147 if (mask) {
2148 /*
2149 * disarm... postevents are nops until this event is 'read' via
2150 * waitevent and then re-armed via modwatch
2151 */
2152 evq->ee_eventmask = 0;
2153
2154 /*
2155 * since events are disarmed until after the waitevent
2156 * the ee_req.er_xxxx fields can't change once we've
2157 * inserted this event into the proc queue...
2158 * since waitevent can't see this event until we
2159 * enqueue it, waitevent will see a 'consistent'
2160 * snapshot of the event, even though it won't hold
2161 * the socket lock, and we're updating the event outside
2162 * of the proc lock, which it will hold
2163 */
2164 evq->ee_req.er_eventbits |= mask;
2165
2166 evprocenque(evq);
2167 }
2168 }
2169 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0);
2170 }
2171 #endif /* SOCKETS */
2172
2173
2174 /*
2175 * watchevent system call. user passes us an event to watch
2176 * for. we malloc an event object, initialize it, and queue
2177 * it to the open socket. when the event occurs, postevent()
2178 * will enque it back to our proc where we can retrieve it
2179 * via waitevent().
2180 *
2181 * should this prevent duplicate events on same socket?
2182 *
2183 * Returns:
2184 * ENOMEM No memory for operation
2185 * copyin:EFAULT
2186 */
2187 int
2188 watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval)
2189 {
2190 struct eventqelt *evq = (struct eventqelt *)0;
2191 struct eventqelt *np = NULL;
2192 struct eventreq64 *erp;
2193 struct fileproc *fp = NULL;
2194 int error;
2195
2196 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0);
2197
2198 // get a qelt and fill with users req
2199 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK);
2200
2201 if (evq == NULL)
2202 return (ENOMEM);
2203 erp = &evq->ee_req;
2204
2205 // get users request pkt
2206
2207 if (IS_64BIT_PROCESS(p)) {
2208 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64));
2209 } else {
2210 struct eventreq32 er32;
2211
2212 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32));
2213 if (error == 0) {
2214 /*
2215 * the user only passes in the
2216 * er_type, er_handle and er_data...
2217 * the other fields are initialized
2218 * below, so don't bother to copy
2219 */
2220 erp->er_type = er32.er_type;
2221 erp->er_handle = er32.er_handle;
2222 erp->er_data = (user_addr_t)er32.er_data;
2223 }
2224 }
2225 if (error) {
2226 FREE(evq, M_TEMP);
2227 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2228
2229 return(error);
2230 }
2231 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2232
2233 // validate, freeing qelt if errors
2234 error = 0;
2235 proc_fdlock(p);
2236
2237 if (erp->er_type != EV_FD) {
2238 error = EINVAL;
2239 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2240 error = EBADF;
2241 #if SOCKETS
2242 } else if (fp->f_type == DTYPE_SOCKET) {
2243 socket_lock((struct socket *)fp->f_data, 1);
2244 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2245 #endif /* SOCKETS */
2246 } else if (fp->f_type == DTYPE_PIPE) {
2247 PIPE_LOCK((struct pipe *)fp->f_data);
2248 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2249 } else {
2250 fp_drop(p, erp->er_handle, fp, 1);
2251 error = EINVAL;
2252 }
2253 proc_fdunlock(p);
2254
2255 if (error) {
2256 FREE(evq, M_TEMP);
2257
2258 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0);
2259 return(error);
2260 }
2261
2262 /*
2263 * only allow one watch per file per proc
2264 */
2265 for ( ; np != NULL; np = np->ee_slist.tqe_next) {
2266 if (np->ee_proc == p) {
2267 #if SOCKETS
2268 if (fp->f_type == DTYPE_SOCKET)
2269 socket_unlock((struct socket *)fp->f_data, 1);
2270 else
2271 #endif /* SOCKETS */
2272 PIPE_UNLOCK((struct pipe *)fp->f_data);
2273 fp_drop(p, erp->er_handle, fp, 0);
2274 FREE(evq, M_TEMP);
2275
2276 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2277 return(EINVAL);
2278 }
2279 }
2280 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0;
2281 evq->ee_proc = p;
2282 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2283 evq->ee_flags = 0;
2284
2285 #if SOCKETS
2286 if (fp->f_type == DTYPE_SOCKET) {
2287 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2288 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events
2289
2290 socket_unlock((struct socket *)fp->f_data, 1);
2291 } else
2292 #endif /* SOCKETS */
2293 {
2294 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2295 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES);
2296
2297 PIPE_UNLOCK((struct pipe *)fp->f_data);
2298 }
2299 fp_drop_event(p, erp->er_handle, fp);
2300
2301 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0);
2302 return(0);
2303 }
2304
2305
2306
2307 /*
2308 * waitevent system call.
2309 * grabs the next waiting event for this proc and returns
2310 * it. if no events, user can request to sleep with timeout
2311 * or without or poll mode
2312 * ((tv != NULL && interval == 0) || tv == -1)
2313 */
2314 int
2315 waitevent(proc_t p, struct waitevent_args *uap, int *retval)
2316 {
2317 int error = 0;
2318 struct eventqelt *evq;
2319 struct eventreq64 *erp;
2320 uint64_t abstime, interval;
2321 boolean_t fast_poll = FALSE;
2322 union {
2323 struct eventreq64 er64;
2324 struct eventreq32 er32;
2325 } uer;
2326
2327 interval = 0;
2328
2329 if (uap->tv) {
2330 struct timeval atv;
2331 /*
2332 * check for fast poll method
2333 */
2334 if (IS_64BIT_PROCESS(p)) {
2335 if (uap->tv == (user_addr_t)-1)
2336 fast_poll = TRUE;
2337 } else if (uap->tv == (user_addr_t)((uint32_t)-1))
2338 fast_poll = TRUE;
2339
2340 if (fast_poll == TRUE) {
2341 if (p->p_evlist.tqh_first == NULL) {
2342 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0);
2343 /*
2344 * poll failed
2345 */
2346 *retval = 1;
2347 return (0);
2348 }
2349 proc_lock(p);
2350 goto retry;
2351 }
2352 error = copyin(uap->tv, (caddr_t)&atv, sizeof (atv));
2353
2354 if (error)
2355 return(error);
2356 if (itimerfix(&atv)) {
2357 error = EINVAL;
2358 return(error);
2359 }
2360 interval = tvtoabstime(&atv);
2361 }
2362 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0);
2363
2364 proc_lock(p);
2365 retry:
2366 if ((evq = p->p_evlist.tqh_first) != NULL) {
2367 /*
2368 * found one... make a local copy while it's still on the queue
2369 * to prevent it from changing while in the midst of copying
2370 * don't want to hold the proc lock across a copyout because
2371 * it might block on a page fault at the target in user space
2372 */
2373 erp = &evq->ee_req;
2374
2375 if (IS_64BIT_PROCESS(p))
2376 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64));
2377 else {
2378 uer.er32.er_type = erp->er_type;
2379 uer.er32.er_handle = erp->er_handle;
2380 uer.er32.er_data = (uint32_t)erp->er_data;
2381 uer.er32.er_ecnt = erp->er_ecnt;
2382 uer.er32.er_rcnt = erp->er_rcnt;
2383 uer.er32.er_wcnt = erp->er_wcnt;
2384 uer.er32.er_eventbits = erp->er_eventbits;
2385 }
2386 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist);
2387
2388 evq->ee_flags &= ~EV_QUEUED;
2389
2390 proc_unlock(p);
2391
2392 if (IS_64BIT_PROCESS(p))
2393 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64));
2394 else
2395 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32));
2396
2397 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,
2398 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0);
2399 return (error);
2400 }
2401 else {
2402 if (uap->tv && interval == 0) {
2403 proc_unlock(p);
2404 *retval = 1; // poll failed
2405
2406 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0);
2407 return (error);
2408 }
2409 if (interval != 0)
2410 clock_absolutetime_interval_to_deadline(interval, &abstime);
2411 else
2412 abstime = 0;
2413
2414 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0);
2415
2416 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime);
2417
2418 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0);
2419
2420 if (error == 0)
2421 goto retry;
2422 if (error == ERESTART)
2423 error = EINTR;
2424 if (error == EWOULDBLOCK) {
2425 *retval = 1;
2426 error = 0;
2427 }
2428 }
2429 proc_unlock(p);
2430
2431 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0);
2432 return (error);
2433 }
2434
2435
2436 /*
2437 * modwatch system call. user passes in event to modify.
2438 * if we find it we reset the event bits and que/deque event
2439 * it needed.
2440 */
2441 int
2442 modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval)
2443 {
2444 struct eventreq64 er;
2445 struct eventreq64 *erp = &er;
2446 struct eventqelt *evq = NULL; /* protected by error return */
2447 int error;
2448 struct fileproc *fp;
2449 int flag;
2450
2451 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0);
2452
2453 /*
2454 * get user's request pkt
2455 * just need the er_type and er_handle which sit above the
2456 * problematic er_data (32/64 issue)... so only copy in
2457 * those 2 fields
2458 */
2459 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) {
2460 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2461 return(error);
2462 }
2463 proc_fdlock(p);
2464
2465 if (erp->er_type != EV_FD) {
2466 error = EINVAL;
2467 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) {
2468 error = EBADF;
2469 #if SOCKETS
2470 } else if (fp->f_type == DTYPE_SOCKET) {
2471 socket_lock((struct socket *)fp->f_data, 1);
2472 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2473 #endif /* SOCKETS */
2474 } else if (fp->f_type == DTYPE_PIPE) {
2475 PIPE_LOCK((struct pipe *)fp->f_data);
2476 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2477 } else {
2478 fp_drop(p, erp->er_handle, fp, 1);
2479 error = EINVAL;
2480 }
2481
2482 if (error) {
2483 proc_fdunlock(p);
2484 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0);
2485 return(error);
2486 }
2487
2488 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) {
2489 fp->f_flags &= ~FP_WAITEVENT;
2490 }
2491 proc_fdunlock(p);
2492
2493 // locate event if possible
2494 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2495 if (evq->ee_proc == p)
2496 break;
2497 }
2498 if (evq == NULL) {
2499 #if SOCKETS
2500 if (fp->f_type == DTYPE_SOCKET)
2501 socket_unlock((struct socket *)fp->f_data, 1);
2502 else
2503 #endif /* SOCKETS */
2504 PIPE_UNLOCK((struct pipe *)fp->f_data);
2505 fp_drop(p, erp->er_handle, fp, 0);
2506 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0);
2507 return(EINVAL);
2508 }
2509 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0);
2510
2511 if (uap->u_eventmask == EV_RM) {
2512 EVPROCDEQUE(p, evq);
2513
2514 #if SOCKETS
2515 if (fp->f_type == DTYPE_SOCKET) {
2516 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2517 socket_unlock((struct socket *)fp->f_data, 1);
2518 } else
2519 #endif /* SOCKETS */
2520 {
2521 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2522 PIPE_UNLOCK((struct pipe *)fp->f_data);
2523 }
2524 fp_drop(p, erp->er_handle, fp, 0);
2525 FREE(evq, M_TEMP);
2526 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0);
2527 return(0);
2528 }
2529 switch (uap->u_eventmask & EV_MASK) {
2530
2531 case 0:
2532 flag = 0;
2533 break;
2534
2535 case EV_RE:
2536 case EV_WR:
2537 case EV_RE|EV_WR:
2538 flag = EV_RWBYTES;
2539 break;
2540
2541 case EV_EX:
2542 flag = EV_OOB;
2543 break;
2544
2545 case EV_EX|EV_RE:
2546 case EV_EX|EV_WR:
2547 case EV_EX|EV_RE|EV_WR:
2548 flag = EV_OOB|EV_RWBYTES;
2549 break;
2550
2551 default:
2552 #if SOCKETS
2553 if (fp->f_type == DTYPE_SOCKET)
2554 socket_unlock((struct socket *)fp->f_data, 1);
2555 else
2556 #endif /* SOCKETS */
2557 PIPE_UNLOCK((struct pipe *)fp->f_data);
2558 fp_drop(p, erp->er_handle, fp, 0);
2559 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0);
2560 return(EINVAL);
2561 }
2562 /*
2563 * since we're holding the socket/pipe lock, the event
2564 * cannot go from the unqueued state to the queued state
2565 * however, it can go from the queued state to the unqueued state
2566 * since that direction is protected by the proc_lock...
2567 * so do a quick check for EV_QUEUED w/o holding the proc lock
2568 * since by far the common case will be NOT EV_QUEUED, this saves
2569 * us taking the proc_lock the majority of the time
2570 */
2571 if (evq->ee_flags & EV_QUEUED) {
2572 /*
2573 * EVPROCDEQUE will recheck the state after it grabs the proc_lock
2574 */
2575 EVPROCDEQUE(p, evq);
2576 }
2577 /*
2578 * while the event is off the proc queue and
2579 * we're holding the socket/pipe lock
2580 * it's safe to update these fields...
2581 */
2582 evq->ee_req.er_eventbits = 0;
2583 evq->ee_eventmask = uap->u_eventmask & EV_MASK;
2584
2585 #if SOCKETS
2586 if (fp->f_type == DTYPE_SOCKET) {
2587 postevent((struct socket *)fp->f_data, 0, flag);
2588 socket_unlock((struct socket *)fp->f_data, 1);
2589 } else
2590 #endif /* SOCKETS */
2591 {
2592 postpipeevent((struct pipe *)fp->f_data, flag);
2593 PIPE_UNLOCK((struct pipe *)fp->f_data);
2594 }
2595 fp_drop(p, erp->er_handle, fp, 0);
2596 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0);
2597 return(0);
2598 }
2599
2600 /* this routine is called from the close of fd with proc_fdlock held */
2601 int
2602 waitevent_close(struct proc *p, struct fileproc *fp)
2603 {
2604 struct eventqelt *evq;
2605
2606
2607 fp->f_flags &= ~FP_WAITEVENT;
2608
2609 #if SOCKETS
2610 if (fp->f_type == DTYPE_SOCKET) {
2611 socket_lock((struct socket *)fp->f_data, 1);
2612 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first;
2613 } else
2614 #endif /* SOCKETS */
2615 if (fp->f_type == DTYPE_PIPE) {
2616 PIPE_LOCK((struct pipe *)fp->f_data);
2617 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first;
2618 }
2619 else {
2620 return(EINVAL);
2621 }
2622 proc_fdunlock(p);
2623
2624
2625 // locate event if possible
2626 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) {
2627 if (evq->ee_proc == p)
2628 break;
2629 }
2630 if (evq == NULL) {
2631 #if SOCKETS
2632 if (fp->f_type == DTYPE_SOCKET)
2633 socket_unlock((struct socket *)fp->f_data, 1);
2634 else
2635 #endif /* SOCKETS */
2636 PIPE_UNLOCK((struct pipe *)fp->f_data);
2637
2638 proc_fdlock(p);
2639
2640 return(EINVAL);
2641 }
2642 EVPROCDEQUE(p, evq);
2643
2644 #if SOCKETS
2645 if (fp->f_type == DTYPE_SOCKET) {
2646 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist);
2647 socket_unlock((struct socket *)fp->f_data, 1);
2648 } else
2649 #endif /* SOCKETS */
2650 {
2651 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist);
2652 PIPE_UNLOCK((struct pipe *)fp->f_data);
2653 }
2654 FREE(evq, M_TEMP);
2655
2656 proc_fdlock(p);
2657
2658 return(0);
2659 }
2660
2661
2662 /*
2663 * gethostuuid
2664 *
2665 * Description: Get the host UUID from IOKit and return it to user space.
2666 *
2667 * Parameters: uuid_buf Pointer to buffer to receive UUID
2668 * timeout Timespec for timout
2669 *
2670 * Returns: 0 Success
2671 * EWOULDBLOCK Timeout is too short
2672 * copyout:EFAULT Bad user buffer
2673 *
2674 * Notes: A timeout seems redundant, since if it's tolerable to not
2675 * have a system UUID in hand, then why ask for one?
2676 */
2677 int
2678 gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *retval)
2679 {
2680 kern_return_t kret;
2681 int error;
2682 mach_timespec_t mach_ts; /* for IOKit call */
2683 __darwin_uuid_t uuid_kern; /* for IOKit call */
2684
2685 /* Convert the 32/64 bit timespec into a mach_timespec_t */
2686 if ( proc_is64bit(p) ) {
2687 struct user_timespec ts;
2688 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2689 if (error)
2690 return (error);
2691 mach_ts.tv_sec = ts.tv_sec;
2692 mach_ts.tv_nsec = ts.tv_nsec;
2693 } else {
2694 struct timespec ts;
2695 error = copyin(uap->timeoutp, &ts, sizeof(ts) );
2696 if (error)
2697 return (error);
2698 mach_ts.tv_sec = ts.tv_sec;
2699 mach_ts.tv_nsec = ts.tv_nsec;
2700 }
2701
2702 /* Call IOKit with the stack buffer to get the UUID */
2703 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2704
2705 /*
2706 * If we get it, copy out the data to the user buffer; note that a
2707 * uuid_t is an array of characters, so this is size invariant for
2708 * 32 vs. 64 bit.
2709 */
2710 if (kret == KERN_SUCCESS) {
2711 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2712 } else {
2713 error = EWOULDBLOCK;
2714 }
2715
2716 return (error);
2717 }