]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/sys_generic.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/filedesc.h>
78 #include <sys/ioctl.h>
79 #include <sys/file_internal.h>
80 #include <sys/proc_internal.h>
81 #include <sys/socketvar.h>
82 #include <sys/uio_internal.h>
83 #include <sys/kernel.h>
84 #include <sys/guarded.h>
85 #include <sys/stat.h>
86 #include <sys/malloc.h>
87 #include <sys/sysproto.h>
88
89 #include <sys/mount_internal.h>
90 #include <sys/protosw.h>
91 #include <sys/ev.h>
92 #include <sys/user.h>
93 #include <sys/kdebug.h>
94 #include <sys/poll.h>
95 #include <sys/event.h>
96 #include <sys/eventvar.h>
97 #include <sys/proc.h>
98 #include <sys/kauth.h>
99
100 #include <machine/smp.h>
101 #include <mach/mach_types.h>
102 #include <kern/kern_types.h>
103 #include <kern/assert.h>
104 #include <kern/kalloc.h>
105 #include <kern/thread.h>
106 #include <kern/clock.h>
107 #include <kern/ledger.h>
108 #include <kern/task.h>
109 #include <kern/telemetry.h>
110 #include <kern/waitq.h>
111 #include <kern/sched_prim.h>
112 #include <kern/mpsc_queue.h>
113 #include <kern/debug.h>
114
115 #include <sys/mbuf.h>
116 #include <sys/domain.h>
117 #include <sys/socket.h>
118 #include <sys/socketvar.h>
119 #include <sys/errno.h>
120 #include <sys/syscall.h>
121 #include <sys/pipe.h>
122
123 #include <security/audit/audit.h>
124
125 #include <net/if.h>
126 #include <net/route.h>
127
128 #include <netinet/in.h>
129 #include <netinet/in_systm.h>
130 #include <netinet/ip.h>
131 #include <netinet/in_pcb.h>
132 #include <netinet/ip_var.h>
133 #include <netinet/ip6.h>
134 #include <netinet/tcp.h>
135 #include <netinet/tcp_fsm.h>
136 #include <netinet/tcp_seq.h>
137 #include <netinet/tcp_timer.h>
138 #include <netinet/tcp_var.h>
139 #include <netinet/tcpip.h>
140 #include <netinet/tcp_debug.h>
141 /* for wait queue based select */
142 #include <kern/waitq.h>
143 #include <sys/vnode_internal.h>
144 /* for remote time api*/
145 #include <kern/remote_time.h>
146 #include <os/log.h>
147 #include <sys/log_data.h>
148
149 #if CONFIG_MACF
150 #include <security/mac_framework.h>
151 #endif
152
153 /* for entitlement check */
154 #include <IOKit/IOBSD.h>
155
156 /* XXX should be in a header file somewhere */
157 extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
158
159 int rd_uio(struct proc *p, int fdes, uio_t uio, int is_preadv, user_ssize_t *retval);
160 int wr_uio(struct proc *p, int fdes, uio_t uio, int is_pwritev, user_ssize_t *retval);
161 int do_uiowrite(struct proc *p, struct fileproc *fp, uio_t uio, int flags, user_ssize_t *retval);
162
163 __private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
164 user_addr_t bufp, user_size_t nbyte,
165 off_t offset, int flags, user_ssize_t *retval);
166 __private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
167 user_addr_t bufp, user_size_t nbyte,
168 off_t offset, int flags, user_ssize_t *retval);
169 static int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
170
171 /* Conflict wait queue for when selects collide (opaque type) */
172 struct waitq select_conflict_queue;
173
174 /*
175 * Init routine called from bsd_init.c
176 */
177 void select_waitq_init(void);
178 void
179 select_waitq_init(void)
180 {
181 waitq_init(&select_conflict_queue, SYNC_POLICY_FIFO);
182 }
183
184 #define f_flag fp_glob->fg_flag
185 #define f_type fp_glob->fg_ops->fo_type
186 #define f_cred fp_glob->fg_cred
187 #define f_ops fp_glob->fg_ops
188 #define f_data fp_glob->fg_data
189
190 /*
191 * Read system call.
192 *
193 * Returns: 0 Success
194 * preparefileread:EBADF
195 * preparefileread:ESPIPE
196 * preparefileread:ENXIO
197 * preparefileread:EBADF
198 * dofileread:???
199 */
200 int
201 read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
202 {
203 __pthread_testcancel(1);
204 return read_nocancel(p, (struct read_nocancel_args *)uap, retval);
205 }
206
207 int
208 read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
209 {
210 struct fileproc *fp;
211 int error;
212 int fd = uap->fd;
213 struct vfs_context context;
214
215 if ((error = preparefileread(p, &fp, fd, 0))) {
216 return error;
217 }
218
219 context = *(vfs_context_current());
220 context.vc_ucred = fp->fp_glob->fg_cred;
221
222 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
223 (off_t)-1, 0, retval);
224
225 fp_drop(p, fd, fp, 0);
226
227 return error;
228 }
229
230 /*
231 * Pread system call
232 *
233 * Returns: 0 Success
234 * preparefileread:EBADF
235 * preparefileread:ESPIPE
236 * preparefileread:ENXIO
237 * preparefileread:EBADF
238 * dofileread:???
239 */
240 int
241 pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
242 {
243 __pthread_testcancel(1);
244 return pread_nocancel(p, (struct pread_nocancel_args *)uap, retval);
245 }
246
247 int
248 pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
249 {
250 struct fileproc *fp = NULL; /* fp set by preparefileread() */
251 int fd = uap->fd;
252 int error;
253 struct vfs_context context;
254
255 if ((error = preparefileread(p, &fp, fd, 1))) {
256 goto out;
257 }
258
259 context = *(vfs_context_current());
260 context.vc_ucred = fp->fp_glob->fg_cred;
261
262 error = dofileread(&context, fp, uap->buf, uap->nbyte,
263 uap->offset, FOF_OFFSET, retval);
264
265 fp_drop(p, fd, fp, 0);
266
267 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
268 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
269
270 out:
271 return error;
272 }
273
274 /*
275 * Code common for read and pread
276 */
277
278 /*
279 * Returns: 0 Success
280 * EBADF
281 * ESPIPE
282 * ENXIO
283 * fp_lookup:EBADF
284 */
285 static int
286 preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
287 {
288 vnode_t vp;
289 int error;
290 struct fileproc *fp;
291
292 AUDIT_ARG(fd, fd);
293
294 proc_fdlock_spin(p);
295
296 error = fp_lookup(p, fd, &fp, 1);
297
298 if (error) {
299 proc_fdunlock(p);
300 return error;
301 }
302 if ((fp->f_flag & FREAD) == 0) {
303 error = EBADF;
304 goto out;
305 }
306 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
307 error = ESPIPE;
308 goto out;
309 }
310 if (fp->f_type == DTYPE_VNODE) {
311 vp = (struct vnode *)fp->fp_glob->fg_data;
312
313 if (check_for_pread && (vnode_isfifo(vp))) {
314 error = ESPIPE;
315 goto out;
316 }
317 if (check_for_pread && (vp->v_flag & VISTTY)) {
318 error = ENXIO;
319 goto out;
320 }
321 }
322
323 *fp_ret = fp;
324
325 proc_fdunlock(p);
326 return 0;
327
328 out:
329 fp_drop(p, fd, fp, 1);
330 proc_fdunlock(p);
331 return error;
332 }
333
334
335 /*
336 * Returns: 0 Success
337 * EINVAL
338 * fo_read:???
339 */
340 __private_extern__ int
341 dofileread(vfs_context_t ctx, struct fileproc *fp,
342 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
343 user_ssize_t *retval)
344 {
345 uio_t auio;
346 user_ssize_t bytecnt;
347 int error = 0;
348 char uio_buf[UIO_SIZEOF(1)];
349
350 if (nbyte > INT_MAX) {
351 return EINVAL;
352 }
353
354 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
355 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
356 &uio_buf[0], sizeof(uio_buf));
357 } else {
358 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
359 &uio_buf[0], sizeof(uio_buf));
360 }
361 if (uio_addiov(auio, bufp, nbyte) != 0) {
362 *retval = 0;
363 return EINVAL;
364 }
365
366 bytecnt = nbyte;
367
368 if ((error = fo_read(fp, auio, flags, ctx))) {
369 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
370 error == EINTR || error == EWOULDBLOCK)) {
371 error = 0;
372 }
373 }
374 bytecnt -= uio_resid(auio);
375
376 *retval = bytecnt;
377
378 return error;
379 }
380
381 /*
382 * Vector read.
383 *
384 * Returns: 0 Success
385 * EINVAL
386 * ENOMEM
387 * preparefileread:EBADF
388 * preparefileread:ESPIPE
389 * preparefileread:ENXIO
390 * preparefileread:EBADF
391 * copyin:EFAULT
392 * rd_uio:???
393 */
394 static int
395 readv_preadv_uio(struct proc *p, int fdes,
396 user_addr_t user_iovp, int iovcnt, off_t offset, int is_preadv,
397 user_ssize_t *retval)
398 {
399 uio_t auio = NULL;
400 int error;
401 struct user_iovec *iovp;
402
403 /* Verify range before calling uio_create() */
404 if (iovcnt <= 0 || iovcnt > UIO_MAXIOV) {
405 return EINVAL;
406 }
407
408 /* allocate a uio large enough to hold the number of iovecs passed */
409 auio = uio_create(iovcnt, offset,
410 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
411 UIO_READ);
412
413 /* get location of iovecs within the uio. then copyin the iovecs from
414 * user space.
415 */
416 iovp = uio_iovsaddr(auio);
417 if (iovp == NULL) {
418 error = ENOMEM;
419 goto ExitThisRoutine;
420 }
421 error = copyin_user_iovec_array(user_iovp,
422 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
423 iovcnt, iovp);
424 if (error) {
425 goto ExitThisRoutine;
426 }
427
428 /* finalize uio_t for use and do the IO
429 */
430 error = uio_calculateresid(auio);
431 if (error) {
432 goto ExitThisRoutine;
433 }
434 error = rd_uio(p, fdes, auio, is_preadv, retval);
435
436 ExitThisRoutine:
437 if (auio != NULL) {
438 uio_free(auio);
439 }
440 return error;
441 }
442
443 /*
444 * Scatter read system call.
445 */
446 int
447 readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
448 {
449 __pthread_testcancel(1);
450 return readv_nocancel(p, (struct readv_nocancel_args *)uap, retval);
451 }
452
453 int
454 readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
455 {
456 return readv_preadv_uio(p, uap->fd, uap->iovp, uap->iovcnt, 0, 0, retval);
457 }
458
459 /*
460 * Preadv system call
461 */
462 int
463 sys_preadv(struct proc *p, struct preadv_args *uap, user_ssize_t *retval)
464 {
465 __pthread_testcancel(1);
466 return sys_preadv_nocancel(p, (struct preadv_nocancel_args *)uap, retval);
467 }
468
469 int
470 sys_preadv_nocancel(struct proc *p, struct preadv_nocancel_args *uap, user_ssize_t *retval)
471 {
472 return readv_preadv_uio(p, uap->fd, uap->iovp, uap->iovcnt, uap->offset, 1, retval);
473 }
474
475 /*
476 * Write system call
477 *
478 * Returns: 0 Success
479 * EBADF
480 * fp_lookup:EBADF
481 * dofilewrite:???
482 */
483 int
484 write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
485 {
486 __pthread_testcancel(1);
487 return write_nocancel(p, (struct write_nocancel_args *)uap, retval);
488 }
489
490 int
491 write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
492 {
493 struct fileproc *fp;
494 int error;
495 int fd = uap->fd;
496
497 AUDIT_ARG(fd, fd);
498
499 error = fp_lookup(p, fd, &fp, 0);
500 if (error) {
501 return error;
502 }
503 if ((fp->f_flag & FWRITE) == 0) {
504 error = EBADF;
505 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
506 proc_fdlock(p);
507 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
508 proc_fdunlock(p);
509 } else {
510 struct vfs_context context = *(vfs_context_current());
511 context.vc_ucred = fp->fp_glob->fg_cred;
512
513 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
514 (off_t)-1, 0, retval);
515 }
516 fp_drop(p, fd, fp, 0);
517 return error;
518 }
519
520 /*
521 * pwrite system call
522 *
523 * Returns: 0 Success
524 * EBADF
525 * ESPIPE
526 * ENXIO
527 * EINVAL
528 * fp_lookup:EBADF
529 * dofilewrite:???
530 */
531 int
532 pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
533 {
534 __pthread_testcancel(1);
535 return pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval);
536 }
537
538 int
539 pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
540 {
541 struct fileproc *fp;
542 int error;
543 int fd = uap->fd;
544 vnode_t vp = (vnode_t)0;
545
546 AUDIT_ARG(fd, fd);
547
548 error = fp_get_ftype(p, fd, DTYPE_VNODE, ESPIPE, &fp);
549 if (error) {
550 return error;
551 }
552
553 if ((fp->f_flag & FWRITE) == 0) {
554 error = EBADF;
555 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
556 proc_fdlock(p);
557 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
558 proc_fdunlock(p);
559 } else {
560 struct vfs_context context = *vfs_context_current();
561 context.vc_ucred = fp->fp_glob->fg_cred;
562
563 vp = (vnode_t)fp->fp_glob->fg_data;
564 if (vnode_isfifo(vp)) {
565 error = ESPIPE;
566 goto errout;
567 }
568 if ((vp->v_flag & VISTTY)) {
569 error = ENXIO;
570 goto errout;
571 }
572 if (uap->offset == (off_t)-1) {
573 error = EINVAL;
574 goto errout;
575 }
576
577 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
578 uap->offset, FOF_OFFSET, retval);
579 }
580 errout:
581 fp_drop(p, fd, fp, 0);
582
583 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
584 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
585
586 return error;
587 }
588
589 /*
590 * Returns: 0 Success
591 * EINVAL
592 * <fo_write>:EPIPE
593 * <fo_write>:??? [indirect through struct fileops]
594 */
595 __private_extern__ int
596 dofilewrite(vfs_context_t ctx, struct fileproc *fp,
597 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
598 user_ssize_t *retval)
599 {
600 uio_t auio;
601 int error = 0;
602 user_ssize_t bytecnt;
603 char uio_buf[UIO_SIZEOF(1)];
604
605 if (nbyte > INT_MAX) {
606 *retval = 0;
607 return EINVAL;
608 }
609
610 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
611 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
612 &uio_buf[0], sizeof(uio_buf));
613 } else {
614 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
615 &uio_buf[0], sizeof(uio_buf));
616 }
617 if (uio_addiov(auio, bufp, nbyte) != 0) {
618 *retval = 0;
619 return EINVAL;
620 }
621
622 bytecnt = nbyte;
623 if ((error = fo_write(fp, auio, flags, ctx))) {
624 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
625 error == EINTR || error == EWOULDBLOCK)) {
626 error = 0;
627 }
628 /* The socket layer handles SIGPIPE */
629 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
630 (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) == 0) {
631 /* XXX Raise the signal on the thread? */
632 psignal(vfs_context_proc(ctx), SIGPIPE);
633 }
634 }
635 bytecnt -= uio_resid(auio);
636 if (bytecnt) {
637 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
638 }
639 *retval = bytecnt;
640
641 return error;
642 }
643
644 /*
645 * Returns: 0 Success
646 * EBADF
647 * ESPIPE
648 * ENXIO
649 * fp_lookup:EBADF
650 * fp_guard_exception:???
651 */
652 static int
653 preparefilewrite(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pwrite)
654 {
655 vnode_t vp;
656 int error;
657 struct fileproc *fp;
658
659 AUDIT_ARG(fd, fd);
660
661 proc_fdlock_spin(p);
662
663 error = fp_lookup(p, fd, &fp, 1);
664
665 if (error) {
666 proc_fdunlock(p);
667 return error;
668 }
669 if ((fp->f_flag & FWRITE) == 0) {
670 error = EBADF;
671 goto ExitThisRoutine;
672 }
673 if (FP_ISGUARDED(fp, GUARD_WRITE)) {
674 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
675 goto ExitThisRoutine;
676 }
677 if (check_for_pwrite) {
678 if (fp->f_type != DTYPE_VNODE) {
679 error = ESPIPE;
680 goto ExitThisRoutine;
681 }
682
683 vp = (vnode_t)fp->fp_glob->fg_data;
684 if (vnode_isfifo(vp)) {
685 error = ESPIPE;
686 goto ExitThisRoutine;
687 }
688 if ((vp->v_flag & VISTTY)) {
689 error = ENXIO;
690 goto ExitThisRoutine;
691 }
692 }
693
694 *fp_ret = fp;
695
696 proc_fdunlock(p);
697 return 0;
698
699 ExitThisRoutine:
700 fp_drop(p, fd, fp, 1);
701 proc_fdunlock(p);
702 return error;
703 }
704
705 static int
706 writev_prwritev_uio(struct proc *p, int fd,
707 user_addr_t user_iovp, int iovcnt, off_t offset, int is_pwritev,
708 user_ssize_t *retval)
709 {
710 uio_t auio = NULL;
711 int error;
712 struct user_iovec *iovp;
713
714 /* Verify range before calling uio_create() */
715 if (iovcnt <= 0 || iovcnt > UIO_MAXIOV || offset < 0) {
716 return EINVAL;
717 }
718
719 /* allocate a uio large enough to hold the number of iovecs passed */
720 auio = uio_create(iovcnt, offset,
721 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
722 UIO_WRITE);
723
724 /* get location of iovecs within the uio. then copyin the iovecs from
725 * user space.
726 */
727 iovp = uio_iovsaddr(auio);
728 if (iovp == NULL) {
729 error = ENOMEM;
730 goto ExitThisRoutine;
731 }
732 error = copyin_user_iovec_array(user_iovp,
733 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
734 iovcnt, iovp);
735 if (error) {
736 goto ExitThisRoutine;
737 }
738
739 /* finalize uio_t for use and do the IO
740 */
741 error = uio_calculateresid(auio);
742 if (error) {
743 goto ExitThisRoutine;
744 }
745
746 error = wr_uio(p, fd, auio, is_pwritev, retval);
747
748 ExitThisRoutine:
749 if (auio != NULL) {
750 uio_free(auio);
751 }
752 return error;
753 }
754
755 /*
756 * Gather write system call
757 */
758 int
759 writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
760 {
761 __pthread_testcancel(1);
762 return writev_nocancel(p, (struct writev_nocancel_args *)uap, retval);
763 }
764
765 int
766 writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
767 {
768 return writev_prwritev_uio(p, uap->fd, uap->iovp, uap->iovcnt, 0, 0, retval);
769 }
770
771 /*
772 * Pwritev system call
773 */
774 int
775 sys_pwritev(struct proc *p, struct pwritev_args *uap, user_ssize_t *retval)
776 {
777 __pthread_testcancel(1);
778 return sys_pwritev_nocancel(p, (struct pwritev_nocancel_args *)uap, retval);
779 }
780
781 int
782 sys_pwritev_nocancel(struct proc *p, struct pwritev_nocancel_args *uap, user_ssize_t *retval)
783 {
784 return writev_prwritev_uio(p, uap->fd, uap->iovp, uap->iovcnt, uap->offset, 1, retval);
785 }
786
787 /*
788 * Returns: 0 Success
789 * preparefileread:EBADF
790 * preparefileread:ESPIPE
791 * preparefileread:ENXIO
792 * preparefileread:???
793 * fo_write:???
794 */
795 int
796 wr_uio(struct proc *p, int fd, uio_t uio, int is_pwritev, user_ssize_t *retval)
797 {
798 struct fileproc *fp;
799 int error;
800 int flags;
801
802 if ((error = preparefilewrite(p, &fp, fd, is_pwritev))) {
803 return error;
804 }
805
806 flags = is_pwritev ? FOF_OFFSET : 0;
807 error = do_uiowrite(p, fp, uio, flags, retval);
808
809 fp_drop(p, fd, fp, 0);
810
811 return error;
812 }
813
814 int
815 do_uiowrite(struct proc *p, struct fileproc *fp, uio_t uio, int flags, user_ssize_t *retval)
816 {
817 int error;
818 user_ssize_t count;
819 struct vfs_context context = *vfs_context_current();
820
821 count = uio_resid(uio);
822
823 context.vc_ucred = fp->f_cred;
824 error = fo_write(fp, uio, flags, &context);
825 if (error) {
826 if (uio_resid(uio) != count && (error == ERESTART ||
827 error == EINTR || error == EWOULDBLOCK)) {
828 error = 0;
829 }
830 /* The socket layer handles SIGPIPE */
831 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
832 (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) == 0) {
833 psignal(p, SIGPIPE);
834 }
835 }
836 count -= uio_resid(uio);
837 if (count) {
838 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
839 }
840 *retval = count;
841
842 return error;
843 }
844
845 /*
846 * Returns: 0 Success
847 * preparefileread:EBADF
848 * preparefileread:ESPIPE
849 * preparefileread:ENXIO
850 * fo_read:???
851 */
852 int
853 rd_uio(struct proc *p, int fdes, uio_t uio, int is_preadv, user_ssize_t *retval)
854 {
855 struct fileproc *fp;
856 int error;
857 user_ssize_t count;
858 struct vfs_context context = *vfs_context_current();
859
860 if ((error = preparefileread(p, &fp, fdes, is_preadv))) {
861 return error;
862 }
863
864 count = uio_resid(uio);
865
866 context.vc_ucred = fp->f_cred;
867
868 int flags = is_preadv ? FOF_OFFSET : 0;
869 error = fo_read(fp, uio, flags, &context);
870
871 if (error) {
872 if (uio_resid(uio) != count && (error == ERESTART ||
873 error == EINTR || error == EWOULDBLOCK)) {
874 error = 0;
875 }
876 }
877 *retval = count - uio_resid(uio);
878
879 fp_drop(p, fdes, fp, 0);
880
881 return error;
882 }
883
884 /*
885 * Ioctl system call
886 *
887 * Returns: 0 Success
888 * EBADF
889 * ENOTTY
890 * ENOMEM
891 * ESRCH
892 * copyin:EFAULT
893 * copyoutEFAULT
894 * fp_lookup:EBADF Bad file descriptor
895 * fo_ioctl:???
896 */
897 int
898 ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
899 {
900 struct fileproc *fp = NULL;
901 int error = 0;
902 u_int size = 0;
903 caddr_t datap = NULL, memp = NULL;
904 boolean_t is64bit = FALSE;
905 int tmp = 0;
906 #define STK_PARAMS 128
907 char stkbuf[STK_PARAMS] = {};
908 int fd = uap->fd;
909 u_long com = uap->com;
910 struct vfs_context context = *vfs_context_current();
911
912 AUDIT_ARG(fd, uap->fd);
913 AUDIT_ARG(addr, uap->data);
914
915 is64bit = proc_is64bit(p);
916 #if CONFIG_AUDIT
917 if (is64bit) {
918 AUDIT_ARG(value64, com);
919 } else {
920 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com));
921 }
922 #endif /* CONFIG_AUDIT */
923
924 /*
925 * Interpret high order word to find amount of data to be
926 * copied to/from the user's address space.
927 */
928 size = IOCPARM_LEN(com);
929 if (size > IOCPARM_MAX) {
930 return ENOTTY;
931 }
932 if (size > sizeof(stkbuf)) {
933 memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
934 if (memp == 0) {
935 return ENOMEM;
936 }
937 datap = memp;
938 } else {
939 datap = &stkbuf[0];
940 }
941 if (com & IOC_IN) {
942 if (size) {
943 error = copyin(uap->data, datap, size);
944 if (error) {
945 goto out_nofp;
946 }
947 } else {
948 /* XXX - IOC_IN and no size? we should proably return an error here!! */
949 if (is64bit) {
950 *(user_addr_t *)datap = uap->data;
951 } else {
952 *(uint32_t *)datap = (uint32_t)uap->data;
953 }
954 }
955 } else if ((com & IOC_OUT) && size) {
956 /*
957 * Zero the buffer so the user always
958 * gets back something deterministic.
959 */
960 bzero(datap, size);
961 } else if (com & IOC_VOID) {
962 /* XXX - this is odd since IOC_VOID means no parameters */
963 if (is64bit) {
964 *(user_addr_t *)datap = uap->data;
965 } else {
966 *(uint32_t *)datap = (uint32_t)uap->data;
967 }
968 }
969
970 proc_fdlock(p);
971 error = fp_lookup(p, fd, &fp, 1);
972 if (error) {
973 proc_fdunlock(p);
974 goto out_nofp;
975 }
976
977 AUDIT_ARG(file, p, fp);
978
979 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
980 error = EBADF;
981 goto out;
982 }
983
984 context.vc_ucred = fp->fp_glob->fg_cred;
985
986 #if CONFIG_MACF
987 error = mac_file_check_ioctl(context.vc_ucred, fp->fp_glob, com);
988 if (error) {
989 goto out;
990 }
991 #endif
992
993 switch (com) {
994 case FIONCLEX:
995 *fdflags(p, fd) &= ~UF_EXCLOSE;
996 break;
997
998 case FIOCLEX:
999 *fdflags(p, fd) |= UF_EXCLOSE;
1000 break;
1001
1002 case FIONBIO:
1003 // FIXME (rdar://54898652)
1004 //
1005 // this code is broken if fnctl(F_SETFL), ioctl() are
1006 // called concurrently for the same fileglob.
1007 if ((tmp = *(int *)datap)) {
1008 os_atomic_or(&fp->f_flag, FNONBLOCK, relaxed);
1009 } else {
1010 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
1011 }
1012 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1013 break;
1014
1015 case FIOASYNC:
1016 // FIXME (rdar://54898652)
1017 //
1018 // this code is broken if fnctl(F_SETFL), ioctl() are
1019 // called concurrently for the same fileglob.
1020 if ((tmp = *(int *)datap)) {
1021 os_atomic_or(&fp->f_flag, FASYNC, relaxed);
1022 } else {
1023 os_atomic_andnot(&fp->f_flag, FASYNC, relaxed);
1024 }
1025 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1026 break;
1027
1028 case FIOSETOWN:
1029 tmp = *(int *)datap;
1030 if (fp->f_type == DTYPE_SOCKET) {
1031 ((struct socket *)fp->f_data)->so_pgid = tmp;
1032 break;
1033 }
1034 if (fp->f_type == DTYPE_PIPE) {
1035 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1036 break;
1037 }
1038 if (tmp <= 0) {
1039 tmp = -tmp;
1040 } else {
1041 struct proc *p1 = proc_find(tmp);
1042 if (p1 == 0) {
1043 error = ESRCH;
1044 break;
1045 }
1046 tmp = p1->p_pgrpid;
1047 proc_rele(p1);
1048 }
1049 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1050 break;
1051
1052 case FIOGETOWN:
1053 if (fp->f_type == DTYPE_SOCKET) {
1054 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
1055 break;
1056 }
1057 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
1058 *(int *)datap = -*(int *)datap;
1059 break;
1060
1061 default:
1062 error = fo_ioctl(fp, com, datap, &context);
1063 /*
1064 * Copy any data to user, size was
1065 * already set and checked above.
1066 */
1067 if (error == 0 && (com & IOC_OUT) && size) {
1068 error = copyout(datap, uap->data, (u_int)size);
1069 }
1070 break;
1071 }
1072 out:
1073 fp_drop(p, fd, fp, 1);
1074 proc_fdunlock(p);
1075
1076 out_nofp:
1077 if (memp) {
1078 kheap_free(KHEAP_TEMP, memp, size);
1079 }
1080 return error;
1081 }
1082
1083 int selwait, nselcoll;
1084 #define SEL_FIRSTPASS 1
1085 #define SEL_SECONDPASS 2
1086 extern int selcontinue(int error);
1087 extern int selprocess(int error, int sel_pass);
1088 static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata,
1089 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset);
1090 static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
1091 static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup);
1092 static int seldrop(struct proc *p, u_int32_t *ibits, int nfd, int lim);
1093 static int select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval);
1094
1095 /*
1096 * Select system call.
1097 *
1098 * Returns: 0 Success
1099 * EINVAL Invalid argument
1100 * EAGAIN Nonconformant error if allocation fails
1101 */
1102 int
1103 select(struct proc *p, struct select_args *uap, int32_t *retval)
1104 {
1105 __pthread_testcancel(1);
1106 return select_nocancel(p, (struct select_nocancel_args *)uap, retval);
1107 }
1108
1109 int
1110 select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
1111 {
1112 uint64_t timeout = 0;
1113
1114 if (uap->tv) {
1115 int err;
1116 struct timeval atv;
1117 if (IS_64BIT_PROCESS(p)) {
1118 struct user64_timeval atv64;
1119 err = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1120 /* Loses resolution - assume timeout < 68 years */
1121 atv.tv_sec = (__darwin_time_t)atv64.tv_sec;
1122 atv.tv_usec = atv64.tv_usec;
1123 } else {
1124 struct user32_timeval atv32;
1125 err = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
1126 atv.tv_sec = atv32.tv_sec;
1127 atv.tv_usec = atv32.tv_usec;
1128 }
1129 if (err) {
1130 return err;
1131 }
1132
1133 if (itimerfix(&atv)) {
1134 err = EINVAL;
1135 return err;
1136 }
1137
1138 clock_absolutetime_interval_to_deadline(tvtoabstime(&atv), &timeout);
1139 }
1140
1141 return select_internal(p, uap, timeout, retval);
1142 }
1143
1144 int
1145 pselect(struct proc *p, struct pselect_args *uap, int32_t *retval)
1146 {
1147 __pthread_testcancel(1);
1148 return pselect_nocancel(p, (struct pselect_nocancel_args *)uap, retval);
1149 }
1150
1151 int
1152 pselect_nocancel(struct proc *p, struct pselect_nocancel_args *uap, int32_t *retval)
1153 {
1154 int err;
1155 struct uthread *ut;
1156 uint64_t timeout = 0;
1157
1158 if (uap->ts) {
1159 struct timespec ts;
1160
1161 if (IS_64BIT_PROCESS(p)) {
1162 struct user64_timespec ts64;
1163 err = copyin(uap->ts, (caddr_t)&ts64, sizeof(ts64));
1164 ts.tv_sec = (__darwin_time_t)ts64.tv_sec;
1165 ts.tv_nsec = (long)ts64.tv_nsec;
1166 } else {
1167 struct user32_timespec ts32;
1168 err = copyin(uap->ts, (caddr_t)&ts32, sizeof(ts32));
1169 ts.tv_sec = ts32.tv_sec;
1170 ts.tv_nsec = ts32.tv_nsec;
1171 }
1172 if (err) {
1173 return err;
1174 }
1175
1176 if (!timespec_is_valid(&ts)) {
1177 return EINVAL;
1178 }
1179 clock_absolutetime_interval_to_deadline(tstoabstime(&ts), &timeout);
1180 }
1181
1182 ut = get_bsdthread_info(current_thread());
1183
1184 if (uap->mask != USER_ADDR_NULL) {
1185 /* save current mask, then copyin and set new mask */
1186 sigset_t newset;
1187 err = copyin(uap->mask, &newset, sizeof(sigset_t));
1188 if (err) {
1189 return err;
1190 }
1191 ut->uu_oldmask = ut->uu_sigmask;
1192 ut->uu_flag |= UT_SAS_OLDMASK;
1193 ut->uu_sigmask = (newset & ~sigcantmask);
1194 }
1195
1196 err = select_internal(p, (struct select_nocancel_args *)uap, timeout, retval);
1197
1198 if (err != EINTR && ut->uu_flag & UT_SAS_OLDMASK) {
1199 /*
1200 * Restore old mask (direct return case). NOTE: EINTR can also be returned
1201 * if the thread is cancelled. In that case, we don't reset the signal
1202 * mask to its original value (which usually happens in the signal
1203 * delivery path). This behavior is permitted by POSIX.
1204 */
1205 ut->uu_sigmask = ut->uu_oldmask;
1206 ut->uu_oldmask = 0;
1207 ut->uu_flag &= ~UT_SAS_OLDMASK;
1208 }
1209
1210 return err;
1211 }
1212
1213 /*
1214 * Generic implementation of {,p}select. Care: we type-pun uap across the two
1215 * syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
1216 * are identical. The 5th (timeout) argument points to different types, so we
1217 * unpack in the syscall-specific code, but the generic code still does a null
1218 * check on this argument to determine if a timeout was specified.
1219 */
1220 static int
1221 select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval)
1222 {
1223 int error = 0;
1224 u_int ni, nw;
1225 thread_t th_act;
1226 struct uthread *uth;
1227 struct _select *sel;
1228 struct _select_data *seldata;
1229 int needzerofill = 1;
1230 int count = 0;
1231 size_t sz = 0;
1232
1233 th_act = current_thread();
1234 uth = get_bsdthread_info(th_act);
1235 sel = &uth->uu_select;
1236 seldata = &uth->uu_save.uus_select_data;
1237 *retval = 0;
1238
1239 seldata->args = uap;
1240 seldata->retval = retval;
1241 seldata->wqp = NULL;
1242 seldata->count = 0;
1243
1244 if (uap->nd < 0) {
1245 return EINVAL;
1246 }
1247
1248 /* select on thread of process that already called proc_exit() */
1249 if (p->p_fd == NULL) {
1250 return EBADF;
1251 }
1252
1253 if (uap->nd > p->p_fd->fd_nfiles) {
1254 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
1255 }
1256 nw = howmany(uap->nd, NFDBITS);
1257 ni = nw * sizeof(fd_mask);
1258
1259 /*
1260 * if the previously allocated space for the bits is smaller than
1261 * what is requested or no space has yet been allocated for this
1262 * thread, allocate enough space now.
1263 *
1264 * Note: If this process fails, select() will return EAGAIN; this
1265 * is the same thing pool() returns in a no-memory situation, but
1266 * it is not a POSIX compliant error code for select().
1267 */
1268 if (sel->nbytes < (3 * ni)) {
1269 int nbytes = 3 * ni;
1270
1271 /* Free previous allocation, if any */
1272 if (sel->ibits != NULL) {
1273 FREE(sel->ibits, M_TEMP);
1274 }
1275 if (sel->obits != NULL) {
1276 FREE(sel->obits, M_TEMP);
1277 /* NULL out; subsequent ibits allocation may fail */
1278 sel->obits = NULL;
1279 }
1280
1281 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1282 if (sel->ibits == NULL) {
1283 return EAGAIN;
1284 }
1285 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1286 if (sel->obits == NULL) {
1287 FREE(sel->ibits, M_TEMP);
1288 sel->ibits = NULL;
1289 return EAGAIN;
1290 }
1291 sel->nbytes = nbytes;
1292 needzerofill = 0;
1293 }
1294
1295 if (needzerofill) {
1296 bzero((caddr_t)sel->ibits, sel->nbytes);
1297 bzero((caddr_t)sel->obits, sel->nbytes);
1298 }
1299
1300 /*
1301 * get the bits from the user address space
1302 */
1303 #define getbits(name, x) \
1304 do { \
1305 if (uap->name && (error = copyin(uap->name, \
1306 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1307 goto continuation; \
1308 } while (0)
1309
1310 getbits(in, 0);
1311 getbits(ou, 1);
1312 getbits(ex, 2);
1313 #undef getbits
1314
1315 seldata->abstime = timeout;
1316
1317 if ((error = selcount(p, sel->ibits, uap->nd, &count))) {
1318 goto continuation;
1319 }
1320
1321 /*
1322 * We need an array of waitq pointers. This is due to the new way
1323 * in which waitqs are linked to sets. When a thread selects on a
1324 * file descriptor, a waitq (embedded in a selinfo structure) is
1325 * added to the thread's local waitq set. There is no longer any
1326 * way to directly iterate over all members of a given waitq set.
1327 * The process of linking a waitq into a set may allocate a link
1328 * table object. Because we can't iterate over all the waitqs to
1329 * which our thread waitq set belongs, we need a way of removing
1330 * this link object!
1331 *
1332 * Thus we need a buffer which will hold one waitq pointer
1333 * per FD being selected. During the tear-down phase we can use
1334 * these pointers to dis-associate the underlying selinfo's waitq
1335 * from our thread's waitq set.
1336 *
1337 * Because we also need to allocate a waitq set for this thread,
1338 * we use a bare buffer pointer to hold all the memory. Note that
1339 * this memory is cached in the thread pointer and not reaped until
1340 * the thread exists. This is generally OK because threads that
1341 * call select tend to keep calling select repeatedly.
1342 */
1343 sz = ALIGN(sizeof(struct waitq_set)) + (count * sizeof(uint64_t));
1344 if (sz > uth->uu_wqstate_sz) {
1345 /* (re)allocate a buffer to hold waitq pointers */
1346 if (uth->uu_wqset) {
1347 if (waitq_set_is_valid(uth->uu_wqset)) {
1348 waitq_set_deinit(uth->uu_wqset);
1349 }
1350 FREE(uth->uu_wqset, M_SELECT);
1351 } else if (uth->uu_wqstate_sz && !uth->uu_wqset) {
1352 panic("select: thread structure corrupt! "
1353 "uu_wqstate_sz:%ld, wqstate_buf == NULL",
1354 uth->uu_wqstate_sz);
1355 }
1356 uth->uu_wqstate_sz = sz;
1357 MALLOC(uth->uu_wqset, struct waitq_set *, sz, M_SELECT, M_WAITOK);
1358 if (!uth->uu_wqset) {
1359 panic("can't allocate %ld bytes for wqstate buffer",
1360 uth->uu_wqstate_sz);
1361 }
1362 waitq_set_init(uth->uu_wqset,
1363 SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST, NULL, NULL);
1364 }
1365
1366 if (!waitq_set_is_valid(uth->uu_wqset)) {
1367 waitq_set_init(uth->uu_wqset,
1368 SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST, NULL, NULL);
1369 }
1370
1371 /* the last chunk of our buffer is an array of waitq pointers */
1372 seldata->wqp = (uint64_t *)((char *)(uth->uu_wqset) + ALIGN(sizeof(struct waitq_set)));
1373 bzero(seldata->wqp, sz - ALIGN(sizeof(struct waitq_set)));
1374
1375 seldata->count = count;
1376
1377 continuation:
1378
1379 if (error) {
1380 /*
1381 * We have already cleaned up any state we established,
1382 * either locally or as a result of selcount(). We don't
1383 * need to wait_subqueue_unlink_all(), since we haven't set
1384 * anything at this point.
1385 */
1386 return error;
1387 }
1388
1389 return selprocess(0, SEL_FIRSTPASS);
1390 }
1391
1392 int
1393 selcontinue(int error)
1394 {
1395 return selprocess(error, SEL_SECONDPASS);
1396 }
1397
1398
1399 /*
1400 * selprocess
1401 *
1402 * Parameters: error The error code from our caller
1403 * sel_pass The pass we are on
1404 */
1405 int
1406 selprocess(int error, int sel_pass)
1407 {
1408 int ncoll;
1409 u_int ni, nw;
1410 thread_t th_act;
1411 struct uthread *uth;
1412 struct proc *p;
1413 struct select_nocancel_args *uap;
1414 int *retval;
1415 struct _select *sel;
1416 struct _select_data *seldata;
1417 int unwind = 1;
1418 int prepost = 0;
1419 int somewakeup = 0;
1420 int doretry = 0;
1421 wait_result_t wait_result;
1422
1423 p = current_proc();
1424 th_act = current_thread();
1425 uth = get_bsdthread_info(th_act);
1426 sel = &uth->uu_select;
1427 seldata = &uth->uu_save.uus_select_data;
1428 uap = seldata->args;
1429 retval = seldata->retval;
1430
1431 if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) {
1432 unwind = 0;
1433 }
1434 if (seldata->count == 0) {
1435 unwind = 0;
1436 }
1437 retry:
1438 if (error != 0) {
1439 goto done;
1440 }
1441
1442 ncoll = nselcoll;
1443 OSBitOrAtomic(P_SELECT, &p->p_flag);
1444
1445 /* skip scans if the select is just for timeouts */
1446 if (seldata->count) {
1447 error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, uth->uu_wqset);
1448 if (error || *retval) {
1449 goto done;
1450 }
1451 if (prepost || somewakeup) {
1452 /*
1453 * if the select of log, then we can wakeup and
1454 * discover some one else already read the data;
1455 * go to select again if time permits
1456 */
1457 prepost = 0;
1458 somewakeup = 0;
1459 doretry = 1;
1460 }
1461 }
1462
1463 if (uap->tv) {
1464 uint64_t now;
1465
1466 clock_get_uptime(&now);
1467 if (now >= seldata->abstime) {
1468 goto done;
1469 }
1470 }
1471
1472 if (doretry) {
1473 /* cleanup obits and try again */
1474 doretry = 0;
1475 sel_pass = SEL_FIRSTPASS;
1476 goto retry;
1477 }
1478
1479 /*
1480 * To effect a poll, the timeout argument should be
1481 * non-nil, pointing to a zero-valued timeval structure.
1482 */
1483 if (uap->tv && seldata->abstime == 0) {
1484 goto done;
1485 }
1486
1487 /* No spurious wakeups due to colls,no need to check for them */
1488 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
1489 sel_pass = SEL_FIRSTPASS;
1490 goto retry;
1491 }
1492
1493 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1494
1495 /* if the select is just for timeout skip check */
1496 if (seldata->count && (sel_pass == SEL_SECONDPASS)) {
1497 panic("selprocess: 2nd pass assertwaiting");
1498 }
1499
1500 /* waitq_set has waitqueue as first element */
1501 wait_result = waitq_assert_wait64_leeway((struct waitq *)uth->uu_wqset,
1502 NO_EVENT64, THREAD_ABORTSAFE,
1503 TIMEOUT_URGENCY_USER_NORMAL,
1504 seldata->abstime,
1505 TIMEOUT_NO_LEEWAY);
1506 if (wait_result != THREAD_AWAKENED) {
1507 /* there are no preposted events */
1508 error = tsleep1(NULL, PSOCK | PCATCH,
1509 "select", 0, selcontinue);
1510 } else {
1511 prepost = 1;
1512 error = 0;
1513 }
1514
1515 if (error == 0) {
1516 sel_pass = SEL_SECONDPASS;
1517 if (!prepost) {
1518 somewakeup = 1;
1519 }
1520 goto retry;
1521 }
1522 done:
1523 if (unwind) {
1524 seldrop(p, sel->ibits, uap->nd, seldata->count);
1525 waitq_set_deinit(uth->uu_wqset);
1526 /*
1527 * zero out the waitq pointer array to avoid use-after free
1528 * errors in the selcount error path (seldrop_locked) if/when
1529 * the thread re-calls select().
1530 */
1531 bzero((void *)uth->uu_wqset, uth->uu_wqstate_sz);
1532 }
1533 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1534 /* select is not restarted after signals... */
1535 if (error == ERESTART) {
1536 error = EINTR;
1537 }
1538 if (error == EWOULDBLOCK) {
1539 error = 0;
1540 }
1541 nw = howmany(uap->nd, NFDBITS);
1542 ni = nw * sizeof(fd_mask);
1543
1544 #define putbits(name, x) \
1545 do { \
1546 if (uap->name && (error2 = \
1547 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1548 error = error2; \
1549 } while (0)
1550
1551 if (error == 0) {
1552 int error2;
1553
1554 putbits(in, 0);
1555 putbits(ou, 1);
1556 putbits(ex, 2);
1557 #undef putbits
1558 }
1559
1560 if (error != EINTR && sel_pass == SEL_SECONDPASS && uth->uu_flag & UT_SAS_OLDMASK) {
1561 /* restore signal mask - continuation case */
1562 uth->uu_sigmask = uth->uu_oldmask;
1563 uth->uu_oldmask = 0;
1564 uth->uu_flag &= ~UT_SAS_OLDMASK;
1565 }
1566
1567 return error;
1568 }
1569
1570
1571 /**
1572 * remove the fileproc's underlying waitq from the supplied waitq set;
1573 * clear FP_INSELECT when appropriate
1574 *
1575 * Parameters:
1576 * fp File proc that is potentially currently in select
1577 * wqset Waitq set to which the fileproc may belong
1578 * (usually this is the thread's private waitq set)
1579 * Conditions:
1580 * proc_fdlock is held
1581 */
1582 static void
1583 selunlinkfp(struct fileproc *fp, uint64_t wqp_id, struct waitq_set *wqset)
1584 {
1585 int valid_set = waitq_set_is_valid(wqset);
1586 int valid_q = !!wqp_id;
1587
1588 /*
1589 * This could be called (from selcount error path) before we setup
1590 * the thread's wqset. Check the wqset passed in, and only unlink if
1591 * the set is valid.
1592 */
1593
1594 /* unlink the underlying waitq from the input set (thread waitq set) */
1595 if (valid_q && valid_set) {
1596 waitq_unlink_by_prepost_id(wqp_id, wqset);
1597 }
1598
1599 /* allow passing a invalid fp for seldrop unwind */
1600 if (!(fp->fp_flags & (FP_INSELECT | FP_SELCONFLICT))) {
1601 return;
1602 }
1603
1604 /*
1605 * We can always remove the conflict queue from our thread's set: this
1606 * will not affect other threads that potentially need to be awoken on
1607 * the conflict queue during a fileproc_drain - those sets will still
1608 * be linked with the global conflict queue, and the last waiter
1609 * on the fp clears the CONFLICT marker.
1610 */
1611 if (valid_set && (fp->fp_flags & FP_SELCONFLICT)) {
1612 waitq_unlink(&select_conflict_queue, wqset);
1613 }
1614
1615 /* jca: TODO:
1616 * This isn't quite right - we don't actually know if this
1617 * fileproc is in another select or not! Here we just assume
1618 * that if we were the first thread to select on the FD, then
1619 * we'll be the one to clear this flag...
1620 */
1621 if (valid_set && fp->fp_wset == (void *)wqset) {
1622 fp->fp_flags &= ~FP_INSELECT;
1623 fp->fp_wset = NULL;
1624 }
1625 }
1626
1627 /**
1628 * connect a fileproc to the given wqset, potentially bridging to a waitq
1629 * pointed to indirectly by wq_data
1630 *
1631 * Parameters:
1632 * fp File proc potentially currently in select
1633 * wq_data Pointer to a pointer to a waitq (could be NULL)
1634 * wqset Waitq set to which the fileproc should now belong
1635 * (usually this is the thread's private waitq set)
1636 *
1637 * Conditions:
1638 * proc_fdlock is held
1639 */
1640 static uint64_t
1641 sellinkfp(struct fileproc *fp, void **wq_data, struct waitq_set *wqset)
1642 {
1643 struct waitq *f_wq = NULL;
1644
1645 if ((fp->fp_flags & FP_INSELECT) != FP_INSELECT) {
1646 if (wq_data) {
1647 panic("non-null data:%p on fp:%p not in select?!"
1648 "(wqset:%p)", wq_data, fp, wqset);
1649 }
1650 return 0;
1651 }
1652
1653 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
1654 waitq_link(&select_conflict_queue, wqset, WAITQ_SHOULD_LOCK, NULL);
1655 }
1656
1657 /*
1658 * The wq_data parameter has potentially been set by selrecord called
1659 * from a subsystems fo_select() function. If the subsystem does not
1660 * call selrecord, then wq_data will be NULL
1661 *
1662 * Use memcpy to get the value into a proper pointer because
1663 * wq_data most likely points to a stack variable that could be
1664 * unaligned on 32-bit systems.
1665 */
1666 if (wq_data) {
1667 memcpy(&f_wq, wq_data, sizeof(f_wq));
1668 if (!waitq_is_valid(f_wq)) {
1669 f_wq = NULL;
1670 }
1671 }
1672
1673 /* record the first thread's wqset in the fileproc structure */
1674 if (!fp->fp_wset) {
1675 fp->fp_wset = (void *)wqset;
1676 }
1677
1678 /* handles NULL f_wq */
1679 return waitq_get_prepost_id(f_wq);
1680 }
1681
1682
1683 /*
1684 * selscan
1685 *
1686 * Parameters: p Process performing the select
1687 * sel The per-thread select context structure
1688 * nfd The number of file descriptors to scan
1689 * retval The per thread system call return area
1690 * sel_pass Which pass this is; allowed values are
1691 * SEL_FIRSTPASS and SEL_SECONDPASS
1692 * wqset The per thread wait queue set
1693 *
1694 * Returns: 0 Success
1695 * EIO Invalid p->p_fd field XXX Obsolete?
1696 * EBADF One of the files in the bit vector is
1697 * invalid.
1698 */
1699 static int
1700 selscan(struct proc *p, struct _select *sel, struct _select_data * seldata,
1701 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset)
1702 {
1703 struct filedesc *fdp = p->p_fd;
1704 int msk, i, j, fd;
1705 u_int32_t bits;
1706 struct fileproc *fp;
1707 int n = 0; /* count of bits */
1708 int nc = 0; /* bit vector offset (nc'th bit) */
1709 static int flag[3] = { FREAD, FWRITE, 0 };
1710 u_int32_t *iptr, *optr;
1711 u_int nw;
1712 u_int32_t *ibits, *obits;
1713 uint64_t reserved_link, *rl_ptr = NULL;
1714 int count;
1715 struct vfs_context context = *vfs_context_current();
1716
1717 /*
1718 * Problems when reboot; due to MacOSX signal probs
1719 * in Beaker1C ; verify that the p->p_fd is valid
1720 */
1721 if (fdp == NULL) {
1722 *retval = 0;
1723 return EIO;
1724 }
1725 ibits = sel->ibits;
1726 obits = sel->obits;
1727
1728 nw = howmany(nfd, NFDBITS);
1729
1730 count = seldata->count;
1731
1732 nc = 0;
1733 if (!count) {
1734 *retval = 0;
1735 return 0;
1736 }
1737
1738 proc_fdlock(p);
1739 for (msk = 0; msk < 3; msk++) {
1740 iptr = (u_int32_t *)&ibits[msk * nw];
1741 optr = (u_int32_t *)&obits[msk * nw];
1742
1743 for (i = 0; i < nfd; i += NFDBITS) {
1744 bits = iptr[i / NFDBITS];
1745
1746 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
1747 bits &= ~(1U << j);
1748
1749 fp = fp_get_noref_locked(p, fd);
1750 if (fp == NULL) {
1751 /*
1752 * If we abort because of a bad
1753 * fd, let the caller unwind...
1754 */
1755 proc_fdunlock(p);
1756 return EBADF;
1757 }
1758 if (sel_pass == SEL_SECONDPASS) {
1759 reserved_link = 0;
1760 rl_ptr = NULL;
1761 selunlinkfp(fp, seldata->wqp[nc], wqset);
1762 } else {
1763 reserved_link = waitq_link_reserve((struct waitq *)wqset);
1764 rl_ptr = &reserved_link;
1765 if (fp->fp_flags & FP_INSELECT) {
1766 /* someone is already in select on this fp */
1767 fp->fp_flags |= FP_SELCONFLICT;
1768 } else {
1769 fp->fp_flags |= FP_INSELECT;
1770 }
1771
1772 waitq_set_lazy_init_link(wqset);
1773 }
1774
1775 context.vc_ucred = fp->f_cred;
1776
1777 /*
1778 * stash this value b/c fo_select may replace
1779 * reserved_link with a pointer to a waitq object
1780 */
1781 uint64_t rsvd = reserved_link;
1782
1783 /* The select; set the bit, if true */
1784 if (fp->f_ops && fp->f_type
1785 && fo_select(fp, flag[msk], rl_ptr, &context)) {
1786 optr[fd / NFDBITS] |= (1U << (fd % NFDBITS));
1787 n++;
1788 }
1789 if (sel_pass == SEL_FIRSTPASS) {
1790 waitq_link_release(rsvd);
1791 /*
1792 * If the fp's supporting selinfo structure was linked
1793 * to this thread's waitq set, then 'reserved_link'
1794 * will have been updated by selrecord to be a pointer
1795 * to the selinfo's waitq.
1796 */
1797 if (reserved_link == rsvd) {
1798 rl_ptr = NULL; /* fo_select never called selrecord() */
1799 }
1800 /*
1801 * Hook up the thread's waitq set either to
1802 * the fileproc structure, or to the global
1803 * conflict queue: but only on the first
1804 * select pass.
1805 */
1806 seldata->wqp[nc] = sellinkfp(fp, (void **)rl_ptr, wqset);
1807 }
1808 nc++;
1809 }
1810 }
1811 }
1812 proc_fdunlock(p);
1813
1814 *retval = n;
1815 return 0;
1816 }
1817
1818 static int poll_callback(struct kevent_qos_s *, kevent_ctx_t);
1819
1820 int
1821 poll(struct proc *p, struct poll_args *uap, int32_t *retval)
1822 {
1823 __pthread_testcancel(1);
1824 return poll_nocancel(p, (struct poll_nocancel_args *)uap, retval);
1825 }
1826
1827
1828 int
1829 poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1830 {
1831 struct pollfd *fds = NULL;
1832 struct kqueue *kq = NULL;
1833 int ncoll, error = 0;
1834 u_int nfds = uap->nfds;
1835 u_int rfds = 0;
1836 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
1837
1838 /*
1839 * This is kinda bogus. We have fd limits, but that is not
1840 * really related to the size of the pollfd array. Make sure
1841 * we let the process use at least FD_SETSIZE entries and at
1842 * least enough for the current limits. We want to be reasonably
1843 * safe, but not overly restrictive.
1844 */
1845 if (nfds > OPEN_MAX ||
1846 (nfds > nofile && (proc_suser(p) || nfds > FD_SETSIZE))) {
1847 return EINVAL;
1848 }
1849
1850 kq = kqueue_alloc(p);
1851 if (kq == NULL) {
1852 return EAGAIN;
1853 }
1854
1855 if (nfds) {
1856 size_t ni = nfds * sizeof(struct pollfd);
1857 MALLOC(fds, struct pollfd *, ni, M_TEMP, M_WAITOK);
1858 if (NULL == fds) {
1859 error = EAGAIN;
1860 goto out;
1861 }
1862
1863 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1864 if (error) {
1865 goto out;
1866 }
1867 }
1868
1869 /* JMM - all this P_SELECT stuff is bogus */
1870 ncoll = nselcoll;
1871 OSBitOrAtomic(P_SELECT, &p->p_flag);
1872 for (u_int i = 0; i < nfds; i++) {
1873 short events = fds[i].events;
1874 __assert_only int rc;
1875
1876 /* per spec, ignore fd values below zero */
1877 if (fds[i].fd < 0) {
1878 fds[i].revents = 0;
1879 continue;
1880 }
1881
1882 /* convert the poll event into a kqueue kevent */
1883 struct kevent_qos_s kev = {
1884 .ident = fds[i].fd,
1885 .flags = EV_ADD | EV_ONESHOT | EV_POLL,
1886 .udata = CAST_USER_ADDR_T(&fds[i])
1887 };
1888
1889 /* Handle input events */
1890 if (events & (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP)) {
1891 kev.filter = EVFILT_READ;
1892 if (events & (POLLPRI | POLLRDBAND)) {
1893 kev.flags |= EV_OOBAND;
1894 }
1895 rc = kevent_register(kq, &kev, NULL);
1896 assert((rc & FILTER_REGISTER_WAIT) == 0);
1897 }
1898
1899 /* Handle output events */
1900 if ((kev.flags & EV_ERROR) == 0 &&
1901 (events & (POLLOUT | POLLWRNORM | POLLWRBAND))) {
1902 kev.filter = EVFILT_WRITE;
1903 rc = kevent_register(kq, &kev, NULL);
1904 assert((rc & FILTER_REGISTER_WAIT) == 0);
1905 }
1906
1907 /* Handle BSD extension vnode events */
1908 if ((kev.flags & EV_ERROR) == 0 &&
1909 (events & (POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE))) {
1910 kev.filter = EVFILT_VNODE;
1911 kev.fflags = 0;
1912 if (events & POLLEXTEND) {
1913 kev.fflags |= NOTE_EXTEND;
1914 }
1915 if (events & POLLATTRIB) {
1916 kev.fflags |= NOTE_ATTRIB;
1917 }
1918 if (events & POLLNLINK) {
1919 kev.fflags |= NOTE_LINK;
1920 }
1921 if (events & POLLWRITE) {
1922 kev.fflags |= NOTE_WRITE;
1923 }
1924 rc = kevent_register(kq, &kev, NULL);
1925 assert((rc & FILTER_REGISTER_WAIT) == 0);
1926 }
1927
1928 if (kev.flags & EV_ERROR) {
1929 fds[i].revents = POLLNVAL;
1930 rfds++;
1931 } else {
1932 fds[i].revents = 0;
1933 }
1934 }
1935
1936 /*
1937 * Did we have any trouble registering?
1938 * If user space passed 0 FDs, then respect any timeout value passed.
1939 * This is an extremely inefficient sleep. If user space passed one or
1940 * more FDs, and we had trouble registering _all_ of them, then bail
1941 * out. If a subset of the provided FDs failed to register, then we
1942 * will still call the kqueue_scan function.
1943 */
1944 if (nfds && (rfds == nfds)) {
1945 goto done;
1946 }
1947
1948 /* scan for, and possibly wait for, the kevents to trigger */
1949 kevent_ctx_t kectx = kevent_get_context(current_thread());
1950 *kectx = (struct kevent_ctx_s){
1951 .kec_process_noutputs = rfds,
1952 .kec_process_flags = KEVENT_FLAG_POLL,
1953 .kec_deadline = 0, /* wait forever */
1954 };
1955
1956 /*
1957 * If any events have trouble registering, an event has fired and we
1958 * shouldn't wait for events in kqueue_scan.
1959 */
1960 if (rfds) {
1961 kectx->kec_process_flags |= KEVENT_FLAG_IMMEDIATE;
1962 } else if (uap->timeout != -1) {
1963 clock_interval_to_deadline(uap->timeout, NSEC_PER_MSEC,
1964 &kectx->kec_deadline);
1965 }
1966
1967 error = kqueue_scan(kq, kectx->kec_process_flags, kectx, poll_callback);
1968 rfds = kectx->kec_process_noutputs;
1969
1970 done:
1971 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1972 /* poll is not restarted after signals... */
1973 if (error == ERESTART) {
1974 error = EINTR;
1975 }
1976 if (error == 0) {
1977 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1978 *retval = rfds;
1979 }
1980
1981 out:
1982 if (NULL != fds) {
1983 FREE(fds, M_TEMP);
1984 }
1985
1986 kqueue_dealloc(kq);
1987 return error;
1988 }
1989
1990 static int
1991 poll_callback(struct kevent_qos_s *kevp, kevent_ctx_t kectx)
1992 {
1993 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
1994 short prev_revents = fds->revents;
1995 short mask = 0;
1996
1997 /* convert the results back into revents */
1998 if (kevp->flags & EV_EOF) {
1999 fds->revents |= POLLHUP;
2000 }
2001 if (kevp->flags & EV_ERROR) {
2002 fds->revents |= POLLERR;
2003 }
2004
2005 switch (kevp->filter) {
2006 case EVFILT_READ:
2007 if (fds->revents & POLLHUP) {
2008 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND);
2009 } else {
2010 mask = (POLLIN | POLLRDNORM);
2011 if (kevp->flags & EV_OOBAND) {
2012 mask |= (POLLPRI | POLLRDBAND);
2013 }
2014 }
2015 fds->revents |= (fds->events & mask);
2016 break;
2017
2018 case EVFILT_WRITE:
2019 if (!(fds->revents & POLLHUP)) {
2020 fds->revents |= (fds->events & (POLLOUT | POLLWRNORM | POLLWRBAND));
2021 }
2022 break;
2023
2024 case EVFILT_VNODE:
2025 if (kevp->fflags & NOTE_EXTEND) {
2026 fds->revents |= (fds->events & POLLEXTEND);
2027 }
2028 if (kevp->fflags & NOTE_ATTRIB) {
2029 fds->revents |= (fds->events & POLLATTRIB);
2030 }
2031 if (kevp->fflags & NOTE_LINK) {
2032 fds->revents |= (fds->events & POLLNLINK);
2033 }
2034 if (kevp->fflags & NOTE_WRITE) {
2035 fds->revents |= (fds->events & POLLWRITE);
2036 }
2037 break;
2038 }
2039
2040 if (fds->revents != 0 && prev_revents == 0) {
2041 kectx->kec_process_noutputs++;
2042 }
2043
2044 return 0;
2045 }
2046
2047 int
2048 seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
2049 {
2050 return 1;
2051 }
2052
2053 /*
2054 * selcount
2055 *
2056 * Count the number of bits set in the input bit vector, and establish an
2057 * outstanding fp->fp_iocount for each of the descriptors which will be in
2058 * use in the select operation.
2059 *
2060 * Parameters: p The process doing the select
2061 * ibits The input bit vector
2062 * nfd The number of fd's in the vector
2063 * countp Pointer to where to store the bit count
2064 *
2065 * Returns: 0 Success
2066 * EIO Bad per process open file table
2067 * EBADF One of the bits in the input bit vector
2068 * references an invalid fd
2069 *
2070 * Implicit: *countp (modified) Count of fd's
2071 *
2072 * Notes: This function is the first pass under the proc_fdlock() that
2073 * permits us to recognize invalid descriptors in the bit vector;
2074 * the may, however, not remain valid through the drop and
2075 * later reacquisition of the proc_fdlock().
2076 */
2077 static int
2078 selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
2079 {
2080 struct filedesc *fdp = p->p_fd;
2081 int msk, i, j, fd;
2082 u_int32_t bits;
2083 struct fileproc *fp;
2084 int n = 0;
2085 u_int32_t *iptr;
2086 u_int nw;
2087 int error = 0;
2088 int need_wakeup = 0;
2089
2090 /*
2091 * Problems when reboot; due to MacOSX signal probs
2092 * in Beaker1C ; verify that the p->p_fd is valid
2093 */
2094 if (fdp == NULL) {
2095 *countp = 0;
2096 return EIO;
2097 }
2098 nw = howmany(nfd, NFDBITS);
2099
2100 proc_fdlock(p);
2101 for (msk = 0; msk < 3; msk++) {
2102 iptr = (u_int32_t *)&ibits[msk * nw];
2103 for (i = 0; i < nfd; i += NFDBITS) {
2104 bits = iptr[i / NFDBITS];
2105 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
2106 bits &= ~(1U << j);
2107
2108 fp = fp_get_noref_locked(p, fd);
2109 if (fp == NULL) {
2110 *countp = 0;
2111 error = EBADF;
2112 goto bad;
2113 }
2114 os_ref_retain_locked(&fp->fp_iocount);
2115 n++;
2116 }
2117 }
2118 }
2119 proc_fdunlock(p);
2120
2121 *countp = n;
2122 return 0;
2123
2124 bad:
2125 if (n == 0) {
2126 goto out;
2127 }
2128 /* Ignore error return; it's already EBADF */
2129 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup);
2130
2131 out:
2132 proc_fdunlock(p);
2133 if (need_wakeup) {
2134 wakeup(&p->p_fpdrainwait);
2135 }
2136 return error;
2137 }
2138
2139
2140 /*
2141 * seldrop_locked
2142 *
2143 * Drop outstanding wait queue references set up during selscan(); drop the
2144 * outstanding per fileproc fp_iocount picked up during the selcount().
2145 *
2146 * Parameters: p Process performing the select
2147 * ibits Input bit bector of fd's
2148 * nfd Number of fd's
2149 * lim Limit to number of vector entries to
2150 * consider, or -1 for "all"
2151 * inselect True if
2152 * need_wakeup Pointer to flag to set to do a wakeup
2153 * if f_iocont on any descriptor goes to 0
2154 *
2155 * Returns: 0 Success
2156 * EBADF One or more fds in the bit vector
2157 * were invalid, but the rest
2158 * were successfully dropped
2159 *
2160 * Notes: An fd make become bad while the proc_fdlock() is not held,
2161 * if a multithreaded application closes the fd out from under
2162 * the in progress select. In this case, we still have to
2163 * clean up after the set up on the remaining fds.
2164 */
2165 static int
2166 seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup)
2167 {
2168 struct filedesc *fdp = p->p_fd;
2169 int msk, i, j, nc, fd;
2170 u_int32_t bits;
2171 struct fileproc *fp;
2172 u_int32_t *iptr;
2173 u_int nw;
2174 int error = 0;
2175 uthread_t uth = get_bsdthread_info(current_thread());
2176 struct _select_data *seldata;
2177
2178 *need_wakeup = 0;
2179
2180 /*
2181 * Problems when reboot; due to MacOSX signal probs
2182 * in Beaker1C ; verify that the p->p_fd is valid
2183 */
2184 if (fdp == NULL) {
2185 return EIO;
2186 }
2187
2188 nw = howmany(nfd, NFDBITS);
2189 seldata = &uth->uu_save.uus_select_data;
2190
2191 nc = 0;
2192 for (msk = 0; msk < 3; msk++) {
2193 iptr = (u_int32_t *)&ibits[msk * nw];
2194 for (i = 0; i < nfd; i += NFDBITS) {
2195 bits = iptr[i / NFDBITS];
2196 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
2197 bits &= ~(1U << j);
2198 /*
2199 * If we've already dropped as many as were
2200 * counted/scanned, then we are done.
2201 */
2202 if (nc >= lim) {
2203 goto done;
2204 }
2205
2206 /*
2207 * We took an I/O reference in selcount,
2208 * so the fp can't possibly be NULL.
2209 */
2210 fp = fp_get_noref_locked_with_iocount(p, fd);
2211 selunlinkfp(fp,
2212 seldata->wqp ? seldata->wqp[nc] : 0,
2213 uth->uu_wqset);
2214
2215 nc++;
2216
2217 const os_ref_count_t refc = os_ref_release_locked(&fp->fp_iocount);
2218 if (0 == refc) {
2219 panic("fp_iocount overdecrement!");
2220 }
2221
2222 if (1 == refc) {
2223 /*
2224 * The last iocount is responsible for clearing
2225 * selconfict flag - even if we didn't set it -
2226 * and is also responsible for waking up anyone
2227 * waiting on iocounts to drain.
2228 */
2229 if (fp->fp_flags & FP_SELCONFLICT) {
2230 fp->fp_flags &= ~FP_SELCONFLICT;
2231 }
2232 if (p->p_fpdrainwait) {
2233 p->p_fpdrainwait = 0;
2234 *need_wakeup = 1;
2235 }
2236 }
2237 }
2238 }
2239 }
2240 done:
2241 return error;
2242 }
2243
2244
2245 static int
2246 seldrop(struct proc *p, u_int32_t *ibits, int nfd, int lim)
2247 {
2248 int error;
2249 int need_wakeup = 0;
2250
2251 proc_fdlock(p);
2252 error = seldrop_locked(p, ibits, nfd, lim, &need_wakeup);
2253 proc_fdunlock(p);
2254 if (need_wakeup) {
2255 wakeup(&p->p_fpdrainwait);
2256 }
2257 return error;
2258 }
2259
2260 /*
2261 * Record a select request.
2262 */
2263 void
2264 selrecord(__unused struct proc *selector, struct selinfo *sip, void *s_data)
2265 {
2266 thread_t cur_act = current_thread();
2267 struct uthread * ut = get_bsdthread_info(cur_act);
2268 /* on input, s_data points to the 64-bit ID of a reserved link object */
2269 uint64_t *reserved_link = (uint64_t *)s_data;
2270
2271 /* need to look at collisions */
2272
2273 /*do not record if this is second pass of select */
2274 if (!s_data) {
2275 return;
2276 }
2277
2278 if ((sip->si_flags & SI_INITED) == 0) {
2279 waitq_init(&sip->si_waitq, SYNC_POLICY_FIFO);
2280 sip->si_flags |= SI_INITED;
2281 sip->si_flags &= ~SI_CLEAR;
2282 }
2283
2284 if (sip->si_flags & SI_RECORDED) {
2285 sip->si_flags |= SI_COLL;
2286 } else {
2287 sip->si_flags &= ~SI_COLL;
2288 }
2289
2290 sip->si_flags |= SI_RECORDED;
2291 /* note: this checks for pre-existing linkage */
2292 waitq_link(&sip->si_waitq, ut->uu_wqset,
2293 WAITQ_SHOULD_LOCK, reserved_link);
2294
2295 /*
2296 * Always consume the reserved link.
2297 * We can always call waitq_link_release() safely because if
2298 * waitq_link is successful, it consumes the link and resets the
2299 * value to 0, in which case our call to release becomes a no-op.
2300 * If waitq_link fails, then the following release call will actually
2301 * release the reserved link object.
2302 */
2303 waitq_link_release(*reserved_link);
2304 *reserved_link = 0;
2305
2306 /*
2307 * Use the s_data pointer as an output parameter as well
2308 * This avoids changing the prototype for this function which is
2309 * used by many kexts. We need to surface the waitq object
2310 * associated with the selinfo we just added to the thread's select
2311 * set. New waitq sets do not have back-pointers to set members, so
2312 * the only way to clear out set linkage objects is to go from the
2313 * waitq to the set. We use a memcpy because s_data could be
2314 * pointing to an unaligned value on the stack
2315 * (especially on 32-bit systems)
2316 */
2317 void *wqptr = (void *)&sip->si_waitq;
2318 memcpy((void *)s_data, (void *)&wqptr, sizeof(void *));
2319
2320 return;
2321 }
2322
2323 void
2324 selwakeup(struct selinfo *sip)
2325 {
2326 if ((sip->si_flags & SI_INITED) == 0) {
2327 return;
2328 }
2329
2330 if (sip->si_flags & SI_COLL) {
2331 nselcoll++;
2332 sip->si_flags &= ~SI_COLL;
2333 #if 0
2334 /* will not support */
2335 //wakeup((caddr_t)&selwait);
2336 #endif
2337 }
2338
2339 if (sip->si_flags & SI_RECORDED) {
2340 waitq_wakeup64_all(&sip->si_waitq, NO_EVENT64,
2341 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2342 sip->si_flags &= ~SI_RECORDED;
2343 }
2344 }
2345
2346 void
2347 selthreadclear(struct selinfo *sip)
2348 {
2349 struct waitq *wq;
2350
2351 if ((sip->si_flags & SI_INITED) == 0) {
2352 return;
2353 }
2354 if (sip->si_flags & SI_RECORDED) {
2355 selwakeup(sip);
2356 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
2357 }
2358 sip->si_flags |= SI_CLEAR;
2359 sip->si_flags &= ~SI_INITED;
2360
2361 wq = &sip->si_waitq;
2362
2363 /*
2364 * Higher level logic may have a handle on this waitq's prepost ID,
2365 * but that's OK because the waitq_deinit will remove/invalidate the
2366 * prepost object (as well as mark the waitq invalid). This de-couples
2367 * us from any callers that may have a handle to this waitq via the
2368 * prepost ID.
2369 */
2370 waitq_deinit(wq);
2371 }
2372
2373
2374 /*
2375 * gethostuuid
2376 *
2377 * Description: Get the host UUID from IOKit and return it to user space.
2378 *
2379 * Parameters: uuid_buf Pointer to buffer to receive UUID
2380 * timeout Timespec for timout
2381 *
2382 * Returns: 0 Success
2383 * EWOULDBLOCK Timeout is too short
2384 * copyout:EFAULT Bad user buffer
2385 * mac_system_check_info:EPERM Client not allowed to perform this operation
2386 *
2387 * Notes: A timeout seems redundant, since if it's tolerable to not
2388 * have a system UUID in hand, then why ask for one?
2389 */
2390 int
2391 gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
2392 {
2393 kern_return_t kret;
2394 int error;
2395 mach_timespec_t mach_ts; /* for IOKit call */
2396 __darwin_uuid_t uuid_kern = {}; /* for IOKit call */
2397
2398 /* Check entitlement */
2399 if (!IOTaskHasEntitlement(current_task(), "com.apple.private.getprivatesysid")) {
2400 #if !defined(XNU_TARGET_OS_OSX)
2401 #if CONFIG_MACF
2402 if ((error = mac_system_check_info(kauth_cred_get(), "hw.uuid")) != 0) {
2403 /* EPERM invokes userspace upcall if present */
2404 return error;
2405 }
2406 #endif
2407 #endif
2408 }
2409
2410 /* Convert the 32/64 bit timespec into a mach_timespec_t */
2411 if (proc_is64bit(p)) {
2412 struct user64_timespec ts;
2413 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2414 if (error) {
2415 return error;
2416 }
2417 mach_ts.tv_sec = (unsigned int)ts.tv_sec;
2418 mach_ts.tv_nsec = (clock_res_t)ts.tv_nsec;
2419 } else {
2420 struct user32_timespec ts;
2421 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2422 if (error) {
2423 return error;
2424 }
2425 mach_ts.tv_sec = ts.tv_sec;
2426 mach_ts.tv_nsec = ts.tv_nsec;
2427 }
2428
2429 /* Call IOKit with the stack buffer to get the UUID */
2430 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2431
2432 /*
2433 * If we get it, copy out the data to the user buffer; note that a
2434 * uuid_t is an array of characters, so this is size invariant for
2435 * 32 vs. 64 bit.
2436 */
2437 if (kret == KERN_SUCCESS) {
2438 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2439 } else {
2440 error = EWOULDBLOCK;
2441 }
2442
2443 return error;
2444 }
2445
2446 /*
2447 * ledger
2448 *
2449 * Description: Omnibus system call for ledger operations
2450 */
2451 int
2452 ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
2453 {
2454 #if !CONFIG_MACF
2455 #pragma unused(p)
2456 #endif
2457 int rval, pid, len, error;
2458 #ifdef LEDGER_DEBUG
2459 struct ledger_limit_args lla;
2460 #endif
2461 task_t task;
2462 proc_t proc;
2463
2464 /* Finish copying in the necessary args before taking the proc lock */
2465 error = 0;
2466 len = 0;
2467 if (args->cmd == LEDGER_ENTRY_INFO) {
2468 error = copyin(args->arg3, (char *)&len, sizeof(len));
2469 } else if (args->cmd == LEDGER_TEMPLATE_INFO) {
2470 error = copyin(args->arg2, (char *)&len, sizeof(len));
2471 } else if (args->cmd == LEDGER_LIMIT)
2472 #ifdef LEDGER_DEBUG
2473 { error = copyin(args->arg2, (char *)&lla, sizeof(lla));}
2474 #else
2475 { return EINVAL; }
2476 #endif
2477 else if ((args->cmd < 0) || (args->cmd > LEDGER_MAX_CMD)) {
2478 return EINVAL;
2479 }
2480
2481 if (error) {
2482 return error;
2483 }
2484 if (len < 0) {
2485 return EINVAL;
2486 }
2487
2488 rval = 0;
2489 if (args->cmd != LEDGER_TEMPLATE_INFO) {
2490 pid = (int)args->arg1;
2491 proc = proc_find(pid);
2492 if (proc == NULL) {
2493 return ESRCH;
2494 }
2495
2496 #if CONFIG_MACF
2497 error = mac_proc_check_ledger(p, proc, args->cmd);
2498 if (error) {
2499 proc_rele(proc);
2500 return error;
2501 }
2502 #endif
2503
2504 task = proc->task;
2505 }
2506
2507 switch (args->cmd) {
2508 #ifdef LEDGER_DEBUG
2509 case LEDGER_LIMIT: {
2510 if (!kauth_cred_issuser(kauth_cred_get())) {
2511 rval = EPERM;
2512 }
2513 rval = ledger_limit(task, &lla);
2514 proc_rele(proc);
2515 break;
2516 }
2517 #endif
2518 case LEDGER_INFO: {
2519 struct ledger_info info = {};
2520
2521 rval = ledger_info(task, &info);
2522 proc_rele(proc);
2523 if (rval == 0) {
2524 rval = copyout(&info, args->arg2,
2525 sizeof(info));
2526 }
2527 break;
2528 }
2529
2530 case LEDGER_ENTRY_INFO: {
2531 void *buf;
2532 int sz;
2533
2534 rval = ledger_get_task_entry_info_multiple(task, &buf, &len);
2535 proc_rele(proc);
2536 if ((rval == 0) && (len >= 0)) {
2537 sz = len * sizeof(struct ledger_entry_info);
2538 rval = copyout(buf, args->arg2, sz);
2539 kheap_free(KHEAP_DATA_BUFFERS, buf, sz);
2540 }
2541 if (rval == 0) {
2542 rval = copyout(&len, args->arg3, sizeof(len));
2543 }
2544 break;
2545 }
2546
2547 case LEDGER_TEMPLATE_INFO: {
2548 void *buf;
2549 int sz;
2550
2551 rval = ledger_template_info(&buf, &len);
2552 if ((rval == 0) && (len >= 0)) {
2553 sz = len * sizeof(struct ledger_template_info);
2554 rval = copyout(buf, args->arg1, sz);
2555 kheap_free(KHEAP_DATA_BUFFERS, buf, sz);
2556 }
2557 if (rval == 0) {
2558 rval = copyout(&len, args->arg2, sizeof(len));
2559 }
2560 break;
2561 }
2562
2563 default:
2564 panic("ledger syscall logic error -- command type %d", args->cmd);
2565 proc_rele(proc);
2566 rval = EINVAL;
2567 }
2568
2569 return rval;
2570 }
2571
2572 int
2573 telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval)
2574 {
2575 int error = 0;
2576
2577 switch (args->cmd) {
2578 #if CONFIG_TELEMETRY
2579 case TELEMETRY_CMD_TIMER_EVENT:
2580 error = telemetry_timer_event(args->deadline, args->interval, args->leeway);
2581 break;
2582 case TELEMETRY_CMD_PMI_SETUP:
2583 error = telemetry_pmi_setup((enum telemetry_pmi)args->deadline, args->interval);
2584 break;
2585 #endif /* CONFIG_TELEMETRY */
2586 case TELEMETRY_CMD_VOUCHER_NAME:
2587 if (thread_set_voucher_name((mach_port_name_t)args->deadline)) {
2588 error = EINVAL;
2589 }
2590 break;
2591
2592 default:
2593 error = EINVAL;
2594 break;
2595 }
2596
2597 return error;
2598 }
2599
2600 /*
2601 * Logging
2602 *
2603 * Description: syscall to access kernel logging from userspace
2604 *
2605 * Args:
2606 * tag - used for syncing with userspace on the version.
2607 * flags - flags used by the syscall.
2608 * buffer - userspace address of string to copy.
2609 * size - size of buffer.
2610 */
2611 int
2612 log_data(__unused struct proc *p, struct log_data_args *args, int *retval)
2613 {
2614 unsigned int tag = args->tag;
2615 unsigned int flags = args->flags;
2616 user_addr_t buffer = args->buffer;
2617 unsigned int size = args->size;
2618 int ret = 0;
2619 char *log_msg = NULL;
2620 int error;
2621 *retval = 0;
2622
2623 /*
2624 * Tag synchronize the syscall version with userspace.
2625 * Tag == 0 => flags == OS_LOG_TYPE
2626 */
2627 if (tag != 0) {
2628 return EINVAL;
2629 }
2630
2631 /*
2632 * OS_LOG_TYPE are defined in libkern/os/log.h
2633 * In userspace they are defined in libtrace/os/log.h
2634 */
2635 if (flags != OS_LOG_TYPE_DEFAULT &&
2636 flags != OS_LOG_TYPE_INFO &&
2637 flags != OS_LOG_TYPE_DEBUG &&
2638 flags != OS_LOG_TYPE_ERROR &&
2639 flags != OS_LOG_TYPE_FAULT) {
2640 return EINVAL;
2641 }
2642
2643 if (size == 0) {
2644 return EINVAL;
2645 }
2646
2647 /* truncate to OS_LOG_DATA_MAX_SIZE */
2648 if (size > OS_LOG_DATA_MAX_SIZE) {
2649 printf("%s: WARNING msg is going to be truncated from %u to %u\n",
2650 __func__, size, OS_LOG_DATA_MAX_SIZE);
2651 size = OS_LOG_DATA_MAX_SIZE;
2652 }
2653
2654 log_msg = kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
2655 if (!log_msg) {
2656 return ENOMEM;
2657 }
2658
2659 error = copyin(buffer, log_msg, size);
2660 if (error) {
2661 ret = EFAULT;
2662 goto out;
2663 }
2664 log_msg[size - 1] = '\0';
2665
2666 /*
2667 * This will log to dmesg and logd.
2668 * The call will fail if the current
2669 * process is not a driverKit process.
2670 */
2671 os_log_driverKit(&ret, OS_LOG_DEFAULT, (os_log_type_t)flags, "%s", log_msg);
2672
2673 out:
2674 if (log_msg != NULL) {
2675 kheap_free(KHEAP_TEMP, log_msg, size);
2676 }
2677
2678 return ret;
2679 }
2680
2681 #if DEVELOPMENT || DEBUG
2682 #if CONFIG_WAITQ_DEBUG
2683 static uint64_t g_wqset_num = 0;
2684 struct g_wqset {
2685 queue_chain_t link;
2686 struct waitq_set *wqset;
2687 };
2688
2689 static queue_head_t g_wqset_list;
2690 static struct waitq_set *g_waitq_set = NULL;
2691
2692 static inline struct waitq_set *
2693 sysctl_get_wqset(int idx)
2694 {
2695 struct g_wqset *gwqs;
2696
2697 if (!g_wqset_num) {
2698 queue_init(&g_wqset_list);
2699 }
2700
2701 /* don't bother with locks: this is test-only code! */
2702 qe_foreach_element(gwqs, &g_wqset_list, link) {
2703 if ((int)(wqset_id(gwqs->wqset) & 0xffffffff) == idx) {
2704 return gwqs->wqset;
2705 }
2706 }
2707
2708 /* allocate a new one */
2709 ++g_wqset_num;
2710 gwqs = (struct g_wqset *)kalloc(sizeof(*gwqs));
2711 assert(gwqs != NULL);
2712
2713 gwqs->wqset = waitq_set_alloc(SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST, NULL);
2714 enqueue_tail(&g_wqset_list, &gwqs->link);
2715 printf("[WQ]: created new waitq set 0x%llx\n", wqset_id(gwqs->wqset));
2716
2717 return gwqs->wqset;
2718 }
2719
2720 #define MAX_GLOBAL_TEST_QUEUES 64
2721 static int g_wq_init = 0;
2722 static struct waitq g_wq[MAX_GLOBAL_TEST_QUEUES];
2723
2724 static inline struct waitq *
2725 global_test_waitq(int idx)
2726 {
2727 if (idx < 0) {
2728 return NULL;
2729 }
2730
2731 if (!g_wq_init) {
2732 g_wq_init = 1;
2733 for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++) {
2734 waitq_init(&g_wq[i], SYNC_POLICY_FIFO);
2735 }
2736 }
2737
2738 return &g_wq[idx % MAX_GLOBAL_TEST_QUEUES];
2739 }
2740
2741 static int sysctl_waitq_wakeup_one SYSCTL_HANDLER_ARGS
2742 {
2743 #pragma unused(oidp, arg1, arg2)
2744 int error;
2745 int index;
2746 struct waitq *waitq;
2747 kern_return_t kr;
2748 int64_t event64 = 0;
2749
2750 error = SYSCTL_IN(req, &event64, sizeof(event64));
2751 if (error) {
2752 return error;
2753 }
2754
2755 if (!req->newptr) {
2756 return SYSCTL_OUT(req, &event64, sizeof(event64));
2757 }
2758
2759 if (event64 < 0) {
2760 index = (int)((-event64) & 0xffffffff);
2761 waitq = wqset_waitq(sysctl_get_wqset(index));
2762 index = -index;
2763 } else {
2764 index = (int)event64;
2765 waitq = global_test_waitq(index);
2766 }
2767
2768 event64 = 0;
2769
2770 printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
2771 index, event64);
2772 kr = waitq_wakeup64_one(waitq, (event64_t)event64, THREAD_AWAKENED,
2773 WAITQ_ALL_PRIORITIES);
2774 printf("[WQ]: \tkr=%d\n", kr);
2775
2776 return SYSCTL_OUT(req, &kr, sizeof(kr));
2777 }
2778 SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_one, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2779 0, 0, sysctl_waitq_wakeup_one, "Q", "wakeup one thread waiting on given event");
2780
2781
2782 static int sysctl_waitq_wakeup_all SYSCTL_HANDLER_ARGS
2783 {
2784 #pragma unused(oidp, arg1, arg2)
2785 int error;
2786 int index;
2787 struct waitq *waitq;
2788 kern_return_t kr;
2789 int64_t event64 = 0;
2790
2791 error = SYSCTL_IN(req, &event64, sizeof(event64));
2792 if (error) {
2793 return error;
2794 }
2795
2796 if (!req->newptr) {
2797 return SYSCTL_OUT(req, &event64, sizeof(event64));
2798 }
2799
2800 if (event64 < 0) {
2801 index = (int)((-event64) & 0xffffffff);
2802 waitq = wqset_waitq(sysctl_get_wqset(index));
2803 index = -index;
2804 } else {
2805 index = (int)event64;
2806 waitq = global_test_waitq(index);
2807 }
2808
2809 event64 = 0;
2810
2811 printf("[WQ]: Waking all threads on waitq [%d] event:0x%llx\n",
2812 index, event64);
2813 kr = waitq_wakeup64_all(waitq, (event64_t)event64,
2814 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
2815 printf("[WQ]: \tkr=%d\n", kr);
2816
2817 return SYSCTL_OUT(req, &kr, sizeof(kr));
2818 }
2819 SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2820 0, 0, sysctl_waitq_wakeup_all, "Q", "wakeup all threads waiting on given event");
2821
2822
2823 static int sysctl_waitq_wait SYSCTL_HANDLER_ARGS
2824 {
2825 #pragma unused(oidp, arg1, arg2)
2826 int error;
2827 int index;
2828 struct waitq *waitq;
2829 kern_return_t kr;
2830 int64_t event64 = 0;
2831
2832 error = SYSCTL_IN(req, &event64, sizeof(event64));
2833 if (error) {
2834 return error;
2835 }
2836
2837 if (!req->newptr) {
2838 return SYSCTL_OUT(req, &event64, sizeof(event64));
2839 }
2840
2841 if (event64 < 0) {
2842 index = (int)((-event64) & 0xffffffff);
2843 waitq = wqset_waitq(sysctl_get_wqset(index));
2844 index = -index;
2845 } else {
2846 index = (int)event64;
2847 waitq = global_test_waitq(index);
2848 }
2849
2850 event64 = 0;
2851
2852 printf("[WQ]: Current thread waiting on waitq [%d] event:0x%llx\n",
2853 index, event64);
2854 kr = waitq_assert_wait64(waitq, (event64_t)event64, THREAD_INTERRUPTIBLE, 0);
2855 if (kr == THREAD_WAITING) {
2856 thread_block(THREAD_CONTINUE_NULL);
2857 }
2858 printf("[WQ]: \tWoke Up: kr=%d\n", kr);
2859
2860 return SYSCTL_OUT(req, &kr, sizeof(kr));
2861 }
2862 SYSCTL_PROC(_kern, OID_AUTO, waitq_wait, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2863 0, 0, sysctl_waitq_wait, "Q", "start waiting on given event");
2864
2865
2866 static int sysctl_wqset_select SYSCTL_HANDLER_ARGS
2867 {
2868 #pragma unused(oidp, arg1, arg2)
2869 int error;
2870 struct waitq_set *wqset;
2871 uint64_t event64 = 0;
2872
2873 error = SYSCTL_IN(req, &event64, sizeof(event64));
2874 if (error) {
2875 return error;
2876 }
2877
2878 if (!req->newptr) {
2879 goto out;
2880 }
2881
2882 wqset = sysctl_get_wqset((int)(event64 & 0xffffffff));
2883 g_waitq_set = wqset;
2884
2885 event64 = wqset_id(wqset);
2886 printf("[WQ]: selected wqset 0x%llx\n", event64);
2887
2888 out:
2889 if (g_waitq_set) {
2890 event64 = wqset_id(g_waitq_set);
2891 } else {
2892 event64 = (uint64_t)(-1);
2893 }
2894
2895 return SYSCTL_OUT(req, &event64, sizeof(event64));
2896 }
2897 SYSCTL_PROC(_kern, OID_AUTO, wqset_select, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2898 0, 0, sysctl_wqset_select, "Q", "select/create a global waitq set");
2899
2900
2901 static int sysctl_waitq_link SYSCTL_HANDLER_ARGS
2902 {
2903 #pragma unused(oidp, arg1, arg2)
2904 int error;
2905 int index;
2906 struct waitq *waitq;
2907 struct waitq_set *wqset;
2908 kern_return_t kr;
2909 uint64_t reserved_link = 0;
2910 int64_t event64 = 0;
2911
2912 error = SYSCTL_IN(req, &event64, sizeof(event64));
2913 if (error) {
2914 return error;
2915 }
2916
2917 if (!req->newptr) {
2918 return SYSCTL_OUT(req, &event64, sizeof(event64));
2919 }
2920
2921 if (!g_waitq_set) {
2922 g_waitq_set = sysctl_get_wqset(1);
2923 }
2924 wqset = g_waitq_set;
2925
2926 if (event64 < 0) {
2927 struct waitq_set *tmp;
2928 index = (int)((-event64) & 0xffffffff);
2929 tmp = sysctl_get_wqset(index);
2930 if (tmp == wqset) {
2931 goto out;
2932 }
2933 waitq = wqset_waitq(tmp);
2934 index = -index;
2935 } else {
2936 index = (int)event64;
2937 waitq = global_test_waitq(index);
2938 }
2939
2940 printf("[WQ]: linking waitq [%d] to global wqset (0x%llx)\n",
2941 index, wqset_id(wqset));
2942 reserved_link = waitq_link_reserve(waitq);
2943 kr = waitq_link(waitq, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
2944 waitq_link_release(reserved_link);
2945
2946 printf("[WQ]: \tkr=%d\n", kr);
2947
2948 out:
2949 return SYSCTL_OUT(req, &kr, sizeof(kr));
2950 }
2951 SYSCTL_PROC(_kern, OID_AUTO, waitq_link, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2952 0, 0, sysctl_waitq_link, "Q", "link global waitq to test waitq set");
2953
2954
2955 static int sysctl_waitq_unlink SYSCTL_HANDLER_ARGS
2956 {
2957 #pragma unused(oidp, arg1, arg2)
2958 int error;
2959 int index;
2960 struct waitq *waitq;
2961 struct waitq_set *wqset;
2962 kern_return_t kr;
2963 uint64_t event64 = 0;
2964
2965 error = SYSCTL_IN(req, &event64, sizeof(event64));
2966 if (error) {
2967 return error;
2968 }
2969
2970 if (!req->newptr) {
2971 return SYSCTL_OUT(req, &event64, sizeof(event64));
2972 }
2973
2974 if (!g_waitq_set) {
2975 g_waitq_set = sysctl_get_wqset(1);
2976 }
2977 wqset = g_waitq_set;
2978
2979 index = (int)event64;
2980 waitq = global_test_waitq(index);
2981
2982 printf("[WQ]: unlinking waitq [%d] from global wqset (0x%llx)\n",
2983 index, wqset_id(wqset));
2984
2985 kr = waitq_unlink(waitq, wqset);
2986 printf("[WQ]: \tkr=%d\n", kr);
2987
2988 return SYSCTL_OUT(req, &kr, sizeof(kr));
2989 }
2990 SYSCTL_PROC(_kern, OID_AUTO, waitq_unlink, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
2991 0, 0, sysctl_waitq_unlink, "Q", "unlink global waitq from test waitq set");
2992
2993
2994 static int sysctl_waitq_clear_prepost SYSCTL_HANDLER_ARGS
2995 {
2996 #pragma unused(oidp, arg1, arg2)
2997 struct waitq *waitq;
2998 uint64_t event64 = 0;
2999 int error, index;
3000
3001 error = SYSCTL_IN(req, &event64, sizeof(event64));
3002 if (error) {
3003 return error;
3004 }
3005
3006 if (!req->newptr) {
3007 return SYSCTL_OUT(req, &event64, sizeof(event64));
3008 }
3009
3010 index = (int)event64;
3011 waitq = global_test_waitq(index);
3012
3013 printf("[WQ]: clearing prepost on waitq [%d]\n", index);
3014 waitq_clear_prepost(waitq);
3015
3016 return SYSCTL_OUT(req, &event64, sizeof(event64));
3017 }
3018 SYSCTL_PROC(_kern, OID_AUTO, waitq_clear_prepost, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3019 0, 0, sysctl_waitq_clear_prepost, "Q", "clear prepost on given waitq");
3020
3021
3022 static int sysctl_wqset_unlink_all SYSCTL_HANDLER_ARGS
3023 {
3024 #pragma unused(oidp, arg1, arg2)
3025 int error;
3026 struct waitq_set *wqset;
3027 kern_return_t kr;
3028 uint64_t event64 = 0;
3029
3030 error = SYSCTL_IN(req, &event64, sizeof(event64));
3031 if (error) {
3032 return error;
3033 }
3034
3035 if (!req->newptr) {
3036 return SYSCTL_OUT(req, &event64, sizeof(event64));
3037 }
3038
3039 if (!g_waitq_set) {
3040 g_waitq_set = sysctl_get_wqset(1);
3041 }
3042 wqset = g_waitq_set;
3043
3044 printf("[WQ]: unlinking all queues from global wqset (0x%llx)\n",
3045 wqset_id(wqset));
3046
3047 kr = waitq_set_unlink_all(wqset);
3048 printf("[WQ]: \tkr=%d\n", kr);
3049
3050 return SYSCTL_OUT(req, &kr, sizeof(kr));
3051 }
3052 SYSCTL_PROC(_kern, OID_AUTO, wqset_unlink_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3053 0, 0, sysctl_wqset_unlink_all, "Q", "unlink all queues from test waitq set");
3054
3055
3056 static int sysctl_wqset_clear_preposts SYSCTL_HANDLER_ARGS
3057 {
3058 #pragma unused(oidp, arg1, arg2)
3059 struct waitq_set *wqset = NULL;
3060 uint64_t event64 = 0;
3061 int error, index;
3062
3063 error = SYSCTL_IN(req, &event64, sizeof(event64));
3064 if (error) {
3065 return error;
3066 }
3067
3068 if (!req->newptr) {
3069 goto out;
3070 }
3071
3072 index = (int)((event64) & 0xffffffff);
3073 wqset = sysctl_get_wqset(index);
3074 assert(wqset != NULL);
3075
3076 printf("[WQ]: clearing preposts on wqset 0x%llx\n", wqset_id(wqset));
3077 waitq_set_clear_preposts(wqset);
3078
3079 out:
3080 if (wqset) {
3081 event64 = wqset_id(wqset);
3082 } else {
3083 event64 = (uint64_t)(-1);
3084 }
3085
3086 return SYSCTL_OUT(req, &event64, sizeof(event64));
3087 }
3088 SYSCTL_PROC(_kern, OID_AUTO, wqset_clear_preposts, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3089 0, 0, sysctl_wqset_clear_preposts, "Q", "clear preposts on given waitq set");
3090
3091 #endif /* CONFIG_WAITQ_DEBUG */
3092
3093 static int
3094 sysctl_waitq_set_nelem SYSCTL_HANDLER_ARGS
3095 {
3096 #pragma unused(oidp, arg1, arg2)
3097 int nelem;
3098
3099 /* Read only */
3100 if (req->newptr != USER_ADDR_NULL) {
3101 return EPERM;
3102 }
3103
3104 nelem = sysctl_helper_waitq_set_nelem();
3105
3106 return SYSCTL_OUT(req, &nelem, sizeof(nelem));
3107 }
3108
3109 SYSCTL_PROC(_kern, OID_AUTO, n_ltable_entries, CTLFLAG_RD | CTLFLAG_LOCKED,
3110 0, 0, sysctl_waitq_set_nelem, "I", "ltable elementis currently used");
3111
3112
3113 static int
3114 sysctl_mpsc_test_pingpong SYSCTL_HANDLER_ARGS
3115 {
3116 #pragma unused(oidp, arg1, arg2)
3117 uint64_t value = 0;
3118 int error;
3119
3120 error = SYSCTL_IN(req, &value, sizeof(value));
3121 if (error) {
3122 return error;
3123 }
3124
3125 if (error == 0 && req->newptr) {
3126 error = mpsc_test_pingpong(value, &value);
3127 if (error == 0) {
3128 error = SYSCTL_OUT(req, &value, sizeof(value));
3129 }
3130 }
3131
3132 return error;
3133 }
3134 SYSCTL_PROC(_kern, OID_AUTO, mpsc_test_pingpong, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3135 0, 0, sysctl_mpsc_test_pingpong, "Q", "MPSC tests: pingpong");
3136
3137 #endif /* DEVELOPMENT || DEBUG */
3138
3139 /*Remote Time api*/
3140 SYSCTL_NODE(_machdep, OID_AUTO, remotetime, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "Remote time api");
3141
3142 #if DEVELOPMENT || DEBUG
3143 #if CONFIG_MACH_BRIDGE_SEND_TIME
3144 extern _Atomic uint32_t bt_init_flag;
3145 extern uint32_t mach_bridge_timer_enable(uint32_t, int);
3146
3147 SYSCTL_INT(_machdep_remotetime, OID_AUTO, bridge_timer_init_flag,
3148 CTLFLAG_RD | CTLFLAG_LOCKED, &bt_init_flag, 0, "");
3149
3150 static int sysctl_mach_bridge_timer_enable SYSCTL_HANDLER_ARGS
3151 {
3152 #pragma unused(oidp, arg1, arg2)
3153 uint32_t value = 0;
3154 int error = 0;
3155 /* User is querying buffer size */
3156 if (req->oldptr == USER_ADDR_NULL && req->newptr == USER_ADDR_NULL) {
3157 req->oldidx = sizeof(value);
3158 return 0;
3159 }
3160 if (os_atomic_load(&bt_init_flag, acquire)) {
3161 if (req->newptr) {
3162 int new_value = 0;
3163 error = SYSCTL_IN(req, &new_value, sizeof(new_value));
3164 if (error) {
3165 return error;
3166 }
3167 if (new_value == 0 || new_value == 1) {
3168 value = mach_bridge_timer_enable(new_value, 1);
3169 } else {
3170 return EPERM;
3171 }
3172 } else {
3173 value = mach_bridge_timer_enable(0, 0);
3174 }
3175 }
3176 error = SYSCTL_OUT(req, &value, sizeof(value));
3177 return error;
3178 }
3179
3180 SYSCTL_PROC(_machdep_remotetime, OID_AUTO, bridge_timer_enable,
3181 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3182 0, 0, sysctl_mach_bridge_timer_enable, "I", "");
3183
3184 #endif /* CONFIG_MACH_BRIDGE_SEND_TIME */
3185
3186 static int sysctl_mach_bridge_remote_time SYSCTL_HANDLER_ARGS
3187 {
3188 #pragma unused(oidp, arg1, arg2)
3189 uint64_t ltime = 0, rtime = 0;
3190 if (req->oldptr == USER_ADDR_NULL) {
3191 req->oldidx = sizeof(rtime);
3192 return 0;
3193 }
3194 if (req->newptr) {
3195 int error = SYSCTL_IN(req, &ltime, sizeof(ltime));
3196 if (error) {
3197 return error;
3198 }
3199 }
3200 rtime = mach_bridge_remote_time(ltime);
3201 return SYSCTL_OUT(req, &rtime, sizeof(rtime));
3202 }
3203 SYSCTL_PROC(_machdep_remotetime, OID_AUTO, mach_bridge_remote_time,
3204 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3205 0, 0, sysctl_mach_bridge_remote_time, "Q", "");
3206
3207 #endif /* DEVELOPMENT || DEBUG */
3208
3209 #if CONFIG_MACH_BRIDGE_RECV_TIME
3210 extern struct bt_params bt_params_get_latest(void);
3211
3212 static int sysctl_mach_bridge_conversion_params SYSCTL_HANDLER_ARGS
3213 {
3214 #pragma unused(oidp, arg1, arg2)
3215 struct bt_params params = {};
3216 if (req->oldptr == USER_ADDR_NULL) {
3217 req->oldidx = sizeof(struct bt_params);
3218 return 0;
3219 }
3220 if (req->newptr) {
3221 return EPERM;
3222 }
3223 params = bt_params_get_latest();
3224 return SYSCTL_OUT(req, &params, MIN(sizeof(params), req->oldlen));
3225 }
3226
3227 SYSCTL_PROC(_machdep_remotetime, OID_AUTO, conversion_params,
3228 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0,
3229 0, sysctl_mach_bridge_conversion_params, "S,bt_params", "");
3230
3231 #endif /* CONFIG_MACH_BRIDGE_RECV_TIME */
3232
3233 #if DEVELOPMENT || DEBUG
3234 #if __AMP__
3235 #include <pexpert/pexpert.h>
3236 extern int32_t sysctl_get_bound_cpuid(void);
3237 extern void sysctl_thread_bind_cpuid(int32_t cpuid);
3238 static int
3239 sysctl_kern_sched_thread_bind_cpu SYSCTL_HANDLER_ARGS
3240 {
3241 #pragma unused(oidp, arg1, arg2)
3242
3243 if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
3244 return ENOENT;
3245 }
3246
3247 int32_t cpuid = sysctl_get_bound_cpuid();
3248
3249 int32_t new_value;
3250 int changed;
3251 int error = sysctl_io_number(req, cpuid, sizeof cpuid, &new_value, &changed);
3252 if (error) {
3253 return error;
3254 }
3255
3256 if (changed) {
3257 sysctl_thread_bind_cpuid(new_value);
3258 }
3259
3260 return error;
3261 }
3262
3263 SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cpu, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3264 0, 0, sysctl_kern_sched_thread_bind_cpu, "I", "");
3265
3266 extern char sysctl_get_bound_cluster_type(void);
3267 extern void sysctl_thread_bind_cluster_type(char cluster_type);
3268 static int
3269 sysctl_kern_sched_thread_bind_cluster_type SYSCTL_HANDLER_ARGS
3270 {
3271 #pragma unused(oidp, arg1, arg2)
3272 char buff[4];
3273
3274 if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
3275 return ENOENT;
3276 }
3277
3278 int error = SYSCTL_IN(req, buff, 1);
3279 if (error) {
3280 return error;
3281 }
3282 char cluster_type = buff[0];
3283
3284 if (!req->newptr) {
3285 goto out;
3286 }
3287
3288 sysctl_thread_bind_cluster_type(cluster_type);
3289 out:
3290 cluster_type = sysctl_get_bound_cluster_type();
3291 buff[0] = cluster_type;
3292
3293 return SYSCTL_OUT(req, buff, 1);
3294 }
3295
3296 SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
3297 0, 0, sysctl_kern_sched_thread_bind_cluster_type, "A", "");
3298
3299 extern char sysctl_get_task_cluster_type(void);
3300 extern void sysctl_task_set_cluster_type(char cluster_type);
3301 static int
3302 sysctl_kern_sched_task_set_cluster_type SYSCTL_HANDLER_ARGS
3303 {
3304 #pragma unused(oidp, arg1, arg2)
3305 char buff[4];
3306
3307 if (!PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
3308 return ENOENT;
3309 }
3310
3311 int error = SYSCTL_IN(req, buff, 1);
3312 if (error) {
3313 return error;
3314 }
3315 char cluster_type = buff[0];
3316
3317 if (!req->newptr) {
3318 goto out;
3319 }
3320
3321 sysctl_task_set_cluster_type(cluster_type);
3322 out:
3323 cluster_type = sysctl_get_task_cluster_type();
3324 buff[0] = cluster_type;
3325
3326 return SYSCTL_OUT(req, buff, 1);
3327 }
3328
3329 SYSCTL_PROC(_kern, OID_AUTO, sched_task_set_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
3330 0, 0, sysctl_kern_sched_task_set_cluster_type, "A", "");
3331
3332 #if CONFIG_SCHED_EDGE
3333
3334 /*
3335 * Edge Scheduler Sysctls
3336 *
3337 * The Edge scheduler uses edge configurations to decide feasability of
3338 * migrating threads across clusters. The sysctls allow dynamic configuration
3339 * of the edge properties and edge weights. This configuration is typically
3340 * updated via callouts from CLPC.
3341 *
3342 * <Edge Multi-cluster Support Needed>
3343 */
3344 extern sched_clutch_edge sched_edge_config_e_to_p;
3345 extern sched_clutch_edge sched_edge_config_p_to_e;
3346 extern kern_return_t sched_edge_sysctl_configure_e_to_p(uint64_t);
3347 extern kern_return_t sched_edge_sysctl_configure_p_to_e(uint64_t);
3348 extern sched_clutch_edge sched_edge_e_to_p(void);
3349 extern sched_clutch_edge sched_edge_p_to_e(void);
3350
3351 static int sysctl_sched_edge_config_e_to_p SYSCTL_HANDLER_ARGS
3352 {
3353 #pragma unused(oidp, arg1, arg2)
3354 int error;
3355 kern_return_t kr;
3356 int64_t edge_config = 0;
3357
3358 error = SYSCTL_IN(req, &edge_config, sizeof(edge_config));
3359 if (error) {
3360 return error;
3361 }
3362
3363 if (!req->newptr) {
3364 edge_config = sched_edge_e_to_p().sce_edge_packed;
3365 return SYSCTL_OUT(req, &edge_config, sizeof(edge_config));
3366 }
3367
3368 kr = sched_edge_sysctl_configure_e_to_p(edge_config);
3369 return SYSCTL_OUT(req, &kr, sizeof(kr));
3370 }
3371 SYSCTL_PROC(_kern, OID_AUTO, sched_edge_config_e_to_p, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3372 0, 0, sysctl_sched_edge_config_e_to_p, "Q", "Edge Scheduler Config for E-to-P cluster");
3373
3374 static int sysctl_sched_edge_config_p_to_e SYSCTL_HANDLER_ARGS
3375 {
3376 #pragma unused(oidp, arg1, arg2)
3377 int error;
3378 kern_return_t kr;
3379 int64_t edge_config = 0;
3380
3381 error = SYSCTL_IN(req, &edge_config, sizeof(edge_config));
3382 if (error) {
3383 return error;
3384 }
3385
3386 if (!req->newptr) {
3387 edge_config = sched_edge_p_to_e().sce_edge_packed;
3388 return SYSCTL_OUT(req, &edge_config, sizeof(edge_config));
3389 }
3390
3391 kr = sched_edge_sysctl_configure_p_to_e(edge_config);
3392 return SYSCTL_OUT(req, &kr, sizeof(kr));
3393 }
3394 SYSCTL_PROC(_kern, OID_AUTO, sched_edge_config_p_to_e, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3395 0, 0, sysctl_sched_edge_config_p_to_e, "Q", "Edge Scheduler Config for P-to-E cluster");
3396
3397 extern int sched_edge_restrict_ut;
3398 SYSCTL_INT(_kern, OID_AUTO, sched_edge_restrict_ut, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_restrict_ut, 0, "Edge Scheduler Restrict UT Threads");
3399 extern int sched_edge_restrict_bg;
3400 SYSCTL_INT(_kern, OID_AUTO, sched_edge_restrict_bg, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_restrict_ut, 0, "Edge Scheduler Restrict BG Threads");
3401 extern int sched_edge_migrate_ipi_immediate;
3402 SYSCTL_INT(_kern, OID_AUTO, sched_edge_migrate_ipi_immediate, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_migrate_ipi_immediate, 0, "Edge Scheduler uses immediate IPIs for migration event based on execution latency");
3403
3404 #endif /* CONFIG_SCHED_EDGE */
3405
3406 #endif /* __AMP__ */
3407 #endif /* DEVELOPMENT || DEBUG */
3408
3409 extern uint32_t task_exc_guard_default;
3410
3411 SYSCTL_INT(_kern, OID_AUTO, task_exc_guard_default,
3412 CTLFLAG_RD | CTLFLAG_LOCKED, &task_exc_guard_default, 0, "");
3413
3414
3415 static int
3416 sysctl_kern_tcsm_available SYSCTL_HANDLER_ARGS
3417 {
3418 #pragma unused(oidp, arg1, arg2)
3419 uint32_t value = machine_csv(CPUVN_CI) ? 1 : 0;
3420
3421 if (req->newptr) {
3422 return EINVAL;
3423 }
3424
3425 return SYSCTL_OUT(req, &value, sizeof(value));
3426 }
3427 SYSCTL_PROC(_kern, OID_AUTO, tcsm_available,
3428 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY,
3429 0, 0, sysctl_kern_tcsm_available, "I", "");
3430
3431
3432 static int
3433 sysctl_kern_tcsm_enable SYSCTL_HANDLER_ARGS
3434 {
3435 #pragma unused(oidp, arg1, arg2)
3436 uint32_t soflags = 0;
3437 uint32_t old_value = thread_get_no_smt() ? 1 : 0;
3438
3439 int error = SYSCTL_IN(req, &soflags, sizeof(soflags));
3440 if (error) {
3441 return error;
3442 }
3443
3444 if (soflags && machine_csv(CPUVN_CI)) {
3445 thread_set_no_smt(true);
3446 machine_tecs(current_thread());
3447 }
3448
3449 return SYSCTL_OUT(req, &old_value, sizeof(old_value));
3450 }
3451 SYSCTL_PROC(_kern, OID_AUTO, tcsm_enable,
3452 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY,
3453 0, 0, sysctl_kern_tcsm_enable, "I", "");
3454
3455
3456 #if DEVELOPMENT || DEBUG
3457 extern void sysctl_task_set_no_smt(char no_smt);
3458 extern char sysctl_task_get_no_smt(void);
3459
3460 static int
3461 sysctl_kern_sched_task_set_no_smt SYSCTL_HANDLER_ARGS
3462 {
3463 #pragma unused(oidp, arg1, arg2)
3464 char buff[4];
3465
3466 int error = SYSCTL_IN(req, buff, 1);
3467 if (error) {
3468 return error;
3469 }
3470 char no_smt = buff[0];
3471
3472 if (!req->newptr) {
3473 goto out;
3474 }
3475
3476 sysctl_task_set_no_smt(no_smt);
3477 out:
3478 no_smt = sysctl_task_get_no_smt();
3479 buff[0] = no_smt;
3480
3481 return SYSCTL_OUT(req, buff, 1);
3482 }
3483
3484 SYSCTL_PROC(_kern, OID_AUTO, sched_task_set_no_smt, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
3485 0, 0, sysctl_kern_sched_task_set_no_smt, "A", "");
3486
3487 static int
3488 sysctl_kern_sched_thread_set_no_smt(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3489 {
3490 int new_value, changed;
3491 int old_value = thread_get_no_smt() ? 1 : 0;
3492 int error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
3493
3494 if (changed) {
3495 thread_set_no_smt(!!new_value);
3496 }
3497
3498 return error;
3499 }
3500
3501 SYSCTL_PROC(_kern, OID_AUTO, sched_thread_set_no_smt,
3502 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
3503 0, 0, sysctl_kern_sched_thread_set_no_smt, "I", "");
3504
3505 static int
3506 sysctl_kern_debug_get_preoslog SYSCTL_HANDLER_ARGS
3507 {
3508 #pragma unused(oidp, arg1, arg2)
3509 static bool oneshot_executed = false;
3510 size_t preoslog_size = 0;
3511 const char *preoslog = NULL;
3512
3513 // DumpPanic pases a non-zero write value when it needs oneshot behaviour
3514 if (req->newptr) {
3515 uint8_t oneshot = 0;
3516 int error = SYSCTL_IN(req, &oneshot, sizeof(oneshot));
3517 if (error) {
3518 return error;
3519 }
3520
3521 if (oneshot) {
3522 if (!OSCompareAndSwap8(false, true, &oneshot_executed)) {
3523 return EPERM;
3524 }
3525 }
3526 }
3527
3528 preoslog = sysctl_debug_get_preoslog(&preoslog_size);
3529 if (preoslog == NULL || preoslog_size == 0) {
3530 return 0;
3531 }
3532
3533 if (req->oldptr == USER_ADDR_NULL) {
3534 req->oldidx = preoslog_size;
3535 return 0;
3536 }
3537
3538 return SYSCTL_OUT(req, preoslog, preoslog_size);
3539 }
3540
3541 SYSCTL_PROC(_kern, OID_AUTO, preoslog, CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_LOCKED,
3542 0, 0, sysctl_kern_debug_get_preoslog, "-", "");
3543
3544 static int
3545 sysctl_kern_task_set_filter_msg_flag SYSCTL_HANDLER_ARGS
3546 {
3547 #pragma unused(oidp, arg1, arg2)
3548 int new_value, changed;
3549 int old_value = task_get_filter_msg_flag(current_task()) ? 1 : 0;
3550 int error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
3551
3552 if (changed) {
3553 task_set_filter_msg_flag(current_task(), !!new_value);
3554 }
3555
3556 return error;
3557 }
3558
3559 SYSCTL_PROC(_kern, OID_AUTO, task_set_filter_msg_flag, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3560 0, 0, sysctl_kern_task_set_filter_msg_flag, "I", "");
3561
3562 #endif /* DEVELOPMENT || DEBUG */