]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/sys_generic.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / bsd / kern / sys_generic.c
CommitLineData
1c79356b 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
67 */
2d21ac55
A
68/*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
1c79356b
A
74
75#include <sys/param.h>
76#include <sys/systm.h>
77#include <sys/filedesc.h>
78#include <sys/ioctl.h>
91447636
A
79#include <sys/file_internal.h>
80#include <sys/proc_internal.h>
1c79356b 81#include <sys/socketvar.h>
91447636 82#include <sys/uio_internal.h>
1c79356b 83#include <sys/kernel.h>
fe8ab488 84#include <sys/guarded.h>
1c79356b
A
85#include <sys/stat.h>
86#include <sys/malloc.h>
91447636 87#include <sys/sysproto.h>
1c79356b 88
91447636 89#include <sys/mount_internal.h>
1c79356b
A
90#include <sys/protosw.h>
91#include <sys/ev.h>
92#include <sys/user.h>
93#include <sys/kdebug.h>
91447636
A
94#include <sys/poll.h>
95#include <sys/event.h>
96#include <sys/eventvar.h>
316670eb 97#include <sys/proc.h>
39236c6e 98#include <sys/kauth.h>
91447636 99
5ba3f43e 100#include <machine/smp.h>
91447636
A
101#include <mach/mach_types.h>
102#include <kern/kern_types.h>
1c79356b 103#include <kern/assert.h>
91447636
A
104#include <kern/kalloc.h>
105#include <kern/thread.h>
106#include <kern/clock.h>
316670eb
A
107#include <kern/ledger.h>
108#include <kern/task.h>
39236c6e 109#include <kern/telemetry.h>
3e170ce0
A
110#include <kern/waitq.h>
111#include <kern/sched_prim.h>
cb323159 112#include <kern/mpsc_queue.h>
f427ee49 113#include <kern/debug.h>
1c79356b
A
114
115#include <sys/mbuf.h>
39236c6e 116#include <sys/domain.h>
1c79356b
A
117#include <sys/socket.h>
118#include <sys/socketvar.h>
119#include <sys/errno.h>
55e303ae 120#include <sys/syscall.h>
91447636 121#include <sys/pipe.h>
1c79356b 122
b0d623f7 123#include <security/audit/audit.h>
e5568f75 124
1c79356b
A
125#include <net/if.h>
126#include <net/route.h>
127
128#include <netinet/in.h>
129#include <netinet/in_systm.h>
130#include <netinet/ip.h>
131#include <netinet/in_pcb.h>
132#include <netinet/ip_var.h>
133#include <netinet/ip6.h>
134#include <netinet/tcp.h>
135#include <netinet/tcp_fsm.h>
136#include <netinet/tcp_seq.h>
137#include <netinet/tcp_timer.h>
138#include <netinet/tcp_var.h>
139#include <netinet/tcpip.h>
140#include <netinet/tcp_debug.h>
0b4e3aa0 141/* for wait queue based select */
3e170ce0 142#include <kern/waitq.h>
91447636 143#include <sys/vnode_internal.h>
0a7de745
A
144/* for remote time api*/
145#include <kern/remote_time.h>
cb323159
A
146#include <os/log.h>
147#include <sys/log_data.h>
91447636 148
5ba3f43e
A
149#if CONFIG_MACF
150#include <security/mac_framework.h>
151#endif
152
cb323159
A
153/* for entitlement check */
154#include <IOKit/IOBSD.h>
155
2d21ac55 156/* XXX should be in a header file somewhere */
2d21ac55
A
157extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp);
158
f427ee49
A
159int rd_uio(struct proc *p, int fdes, uio_t uio, int is_preadv, user_ssize_t *retval);
160int wr_uio(struct proc *p, int fdes, uio_t uio, int is_pwritev, user_ssize_t *retval);
161int do_uiowrite(struct proc *p, struct fileproc *fp, uio_t uio, int flags, user_ssize_t *retval);
91447636 162
0a7de745
A
163__private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp,
164 user_addr_t bufp, user_size_t nbyte,
165 off_t offset, int flags, user_ssize_t *retval);
166__private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
167 user_addr_t bufp, user_size_t nbyte,
168 off_t offset, int flags, user_ssize_t *retval);
f427ee49 169static int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode);
9bccf70c 170
6d2010ae 171/* Conflict wait queue for when selects collide (opaque type) */
3e170ce0 172struct waitq select_conflict_queue;
6d2010ae
A
173
174/*
175 * Init routine called from bsd_init.c
176 */
3e170ce0 177void select_waitq_init(void);
6d2010ae 178void
3e170ce0 179select_waitq_init(void)
6d2010ae 180{
39037602 181 waitq_init(&select_conflict_queue, SYNC_POLICY_FIFO);
6d2010ae
A
182}
183
f427ee49
A
184#define f_flag fp_glob->fg_flag
185#define f_type fp_glob->fg_ops->fo_type
186#define f_cred fp_glob->fg_cred
187#define f_ops fp_glob->fg_ops
188#define f_data fp_glob->fg_data
2d21ac55 189
1c79356b
A
190/*
191 * Read system call.
2d21ac55
A
192 *
193 * Returns: 0 Success
194 * preparefileread:EBADF
195 * preparefileread:ESPIPE
196 * preparefileread:ENXIO
197 * preparefileread:EBADF
198 * dofileread:???
1c79356b 199 */
9bccf70c 200int
2d21ac55
A
201read(struct proc *p, struct read_args *uap, user_ssize_t *retval)
202{
203 __pthread_testcancel(1);
0a7de745 204 return read_nocancel(p, (struct read_nocancel_args *)uap, retval);
2d21ac55
A
205}
206
207int
208read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 209{
91447636 210 struct fileproc *fp;
9bccf70c 211 int error;
91447636 212 int fd = uap->fd;
b0d623f7 213 struct vfs_context context;
91447636 214
0a7de745
A
215 if ((error = preparefileread(p, &fp, fd, 0))) {
216 return error;
217 }
9bccf70c 218
b0d623f7 219 context = *(vfs_context_current());
f427ee49 220 context.vc_ucred = fp->fp_glob->fg_cred;
b0d623f7
A
221
222 error = dofileread(&context, fp, uap->cbuf, uap->nbyte,
0a7de745 223 (off_t)-1, 0, retval);
91447636 224
f427ee49 225 fp_drop(p, fd, fp, 0);
91447636 226
0a7de745 227 return error;
9bccf70c
A
228}
229
0a7de745 230/*
9bccf70c 231 * Pread system call
2d21ac55
A
232 *
233 * Returns: 0 Success
234 * preparefileread:EBADF
235 * preparefileread:ESPIPE
236 * preparefileread:ENXIO
237 * preparefileread:EBADF
238 * dofileread:???
9bccf70c 239 */
9bccf70c 240int
2d21ac55 241pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval)
9bccf70c 242{
2d21ac55 243 __pthread_testcancel(1);
0a7de745 244 return pread_nocancel(p, (struct pread_nocancel_args *)uap, retval);
2d21ac55
A
245}
246
247int
248pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval)
249{
0a7de745 250 struct fileproc *fp = NULL; /* fp set by preparefileread() */
91447636 251 int fd = uap->fd;
9bccf70c 252 int error;
b0d623f7 253 struct vfs_context context;
9bccf70c 254
0a7de745 255 if ((error = preparefileread(p, &fp, fd, 1))) {
4a3eedf9 256 goto out;
0a7de745 257 }
91447636 258
b0d623f7 259 context = *(vfs_context_current());
f427ee49 260 context.vc_ucred = fp->fp_glob->fg_cred;
b0d623f7
A
261
262 error = dofileread(&context, fp, uap->buf, uap->nbyte,
0a7de745
A
263 uap->offset, FOF_OFFSET, retval);
264
f427ee49 265 fp_drop(p, fd, fp, 0);
91447636 266
b7266188 267 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE),
0a7de745 268 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
4a3eedf9
A
269
270out:
0a7de745 271 return error;
9bccf70c
A
272}
273
274/*
275 * Code common for read and pread
276 */
91447636 277
2d21ac55
A
278/*
279 * Returns: 0 Success
280 * EBADF
281 * ESPIPE
282 * ENXIO
283 * fp_lookup:EBADF
2d21ac55 284 */
f427ee49 285static int
91447636
A
286preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread)
287{
288 vnode_t vp;
0a7de745 289 int error;
91447636
A
290 struct fileproc *fp;
291
b0d623f7
A
292 AUDIT_ARG(fd, fd);
293
2d21ac55 294 proc_fdlock_spin(p);
91447636
A
295
296 error = fp_lookup(p, fd, &fp, 1);
297
298 if (error) {
0a7de745
A
299 proc_fdunlock(p);
300 return error;
91447636
A
301 }
302 if ((fp->f_flag & FREAD) == 0) {
0a7de745 303 error = EBADF;
91447636
A
304 goto out;
305 }
306 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) {
0a7de745 307 error = ESPIPE;
91447636
A
308 goto out;
309 }
310 if (fp->f_type == DTYPE_VNODE) {
f427ee49 311 vp = (struct vnode *)fp->fp_glob->fg_data;
91447636 312
2d21ac55
A
313 if (check_for_pread && (vnode_isfifo(vp))) {
314 error = ESPIPE;
315 goto out;
0a7de745 316 }
2d21ac55
A
317 if (check_for_pread && (vp->v_flag & VISTTY)) {
318 error = ENXIO;
319 goto out;
320 }
91447636
A
321 }
322
323 *fp_ret = fp;
324
0a7de745
A
325 proc_fdunlock(p);
326 return 0;
91447636
A
327
328out:
329 fp_drop(p, fd, fp, 1);
330 proc_fdunlock(p);
0a7de745 331 return error;
91447636
A
332}
333
334
2d21ac55
A
335/*
336 * Returns: 0 Success
337 * EINVAL
338 * fo_read:???
339 */
55e303ae 340__private_extern__ int
2d21ac55 341dofileread(vfs_context_t ctx, struct fileproc *fp,
0a7de745
A
342 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
343 user_ssize_t *retval)
1c79356b 344{
91447636
A
345 uio_t auio;
346 user_ssize_t bytecnt;
cb323159 347 int error = 0;
0a7de745 348 char uio_buf[UIO_SIZEOF(1)];
1c79356b 349
0a7de745
A
350 if (nbyte > INT_MAX) {
351 return EINVAL;
352 }
91447636 353
2d21ac55 354 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
0a7de745
A
355 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ,
356 &uio_buf[0], sizeof(uio_buf));
91447636 357 } else {
0a7de745
A
358 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ,
359 &uio_buf[0], sizeof(uio_buf));
91447636 360 }
cb323159
A
361 if (uio_addiov(auio, bufp, nbyte) != 0) {
362 *retval = 0;
363 return EINVAL;
364 }
91447636 365
91447636 366 bytecnt = nbyte;
9bccf70c 367
2d21ac55 368 if ((error = fo_read(fp, auio, flags, ctx))) {
91447636 369 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
0a7de745 370 error == EINTR || error == EWOULDBLOCK)) {
9bccf70c 371 error = 0;
0a7de745 372 }
9bccf70c 373 }
91447636 374 bytecnt -= uio_resid(auio);
91447636
A
375
376 *retval = bytecnt;
377
0a7de745 378 return error;
1c79356b
A
379}
380
0a7de745 381/*
f427ee49 382 * Vector read.
2d21ac55 383 *
f427ee49
A
384 * Returns: 0 Success
385 * EINVAL
386 * ENOMEM
387 * preparefileread:EBADF
388 * preparefileread:ESPIPE
389 * preparefileread:ENXIO
390 * preparefileread:EBADF
391 * copyin:EFAULT
392 * rd_uio:???
9bccf70c 393 */
f427ee49
A
394static int
395readv_preadv_uio(struct proc *p, int fdes,
396 user_addr_t user_iovp, int iovcnt, off_t offset, int is_preadv,
397 user_ssize_t *retval)
1c79356b 398{
91447636 399 uio_t auio = NULL;
1c79356b 400 int error;
91447636
A
401 struct user_iovec *iovp;
402
f427ee49
A
403 /* Verify range before calling uio_create() */
404 if (iovcnt <= 0 || iovcnt > UIO_MAXIOV) {
0a7de745
A
405 return EINVAL;
406 }
91447636
A
407
408 /* allocate a uio large enough to hold the number of iovecs passed */
f427ee49 409 auio = uio_create(iovcnt, offset,
0a7de745
A
410 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
411 UIO_READ);
412
91447636
A
413 /* get location of iovecs within the uio. then copyin the iovecs from
414 * user space.
415 */
416 iovp = uio_iovsaddr(auio);
417 if (iovp == NULL) {
418 error = ENOMEM;
419 goto ExitThisRoutine;
420 }
f427ee49 421 error = copyin_user_iovec_array(user_iovp,
0a7de745 422 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
f427ee49 423 iovcnt, iovp);
91447636
A
424 if (error) {
425 goto ExitThisRoutine;
426 }
0a7de745
A
427
428 /* finalize uio_t for use and do the IO
91447636 429 */
39236c6e
A
430 error = uio_calculateresid(auio);
431 if (error) {
432 goto ExitThisRoutine;
433 }
f427ee49 434 error = rd_uio(p, fdes, auio, is_preadv, retval);
91447636
A
435
436ExitThisRoutine:
437 if (auio != NULL) {
438 uio_free(auio);
439 }
0a7de745 440 return error;
1c79356b
A
441}
442
f427ee49
A
443/*
444 * Scatter read system call.
445 */
446int
447readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval)
448{
449 __pthread_testcancel(1);
450 return readv_nocancel(p, (struct readv_nocancel_args *)uap, retval);
451}
452
453int
454readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval)
455{
456 return readv_preadv_uio(p, uap->fd, uap->iovp, uap->iovcnt, 0, 0, retval);
457}
458
459/*
460 * Preadv system call
461 */
462int
463sys_preadv(struct proc *p, struct preadv_args *uap, user_ssize_t *retval)
464{
465 __pthread_testcancel(1);
466 return sys_preadv_nocancel(p, (struct preadv_nocancel_args *)uap, retval);
467}
468
469int
470sys_preadv_nocancel(struct proc *p, struct preadv_nocancel_args *uap, user_ssize_t *retval)
471{
472 return readv_preadv_uio(p, uap->fd, uap->iovp, uap->iovcnt, uap->offset, 1, retval);
473}
474
1c79356b
A
475/*
476 * Write system call
2d21ac55
A
477 *
478 * Returns: 0 Success
479 * EBADF
480 * fp_lookup:EBADF
481 * dofilewrite:???
1c79356b 482 */
9bccf70c 483int
2d21ac55
A
484write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
485{
486 __pthread_testcancel(1);
0a7de745 487 return write_nocancel(p, (struct write_nocancel_args *)uap, retval);
2d21ac55
A
488}
489
490int
491write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
1c79356b 492{
91447636 493 struct fileproc *fp;
0a7de745 494 int error;
91447636 495 int fd = uap->fd;
9bccf70c 496
b0d623f7
A
497 AUDIT_ARG(fd, fd);
498
0a7de745
A
499 error = fp_lookup(p, fd, &fp, 0);
500 if (error) {
501 return error;
502 }
91447636
A
503 if ((fp->f_flag & FWRITE) == 0) {
504 error = EBADF;
fe8ab488
A
505 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
506 proc_fdlock(p);
507 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
508 proc_fdunlock(p);
91447636 509 } else {
2d21ac55 510 struct vfs_context context = *(vfs_context_current());
f427ee49 511 context.vc_ucred = fp->fp_glob->fg_cred;
2d21ac55
A
512
513 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte,
0a7de745 514 (off_t)-1, 0, retval);
0a7de745 515 }
f427ee49 516 fp_drop(p, fd, fp, 0);
0a7de745 517 return error;
9bccf70c
A
518}
519
0a7de745 520/*
91447636 521 * pwrite system call
2d21ac55
A
522 *
523 * Returns: 0 Success
524 * EBADF
525 * ESPIPE
526 * ENXIO
527 * EINVAL
528 * fp_lookup:EBADF
529 * dofilewrite:???
9bccf70c 530 */
9bccf70c 531int
2d21ac55
A
532pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval)
533{
534 __pthread_testcancel(1);
0a7de745 535 return pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval);
2d21ac55
A
536}
537
538int
539pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval)
9bccf70c 540{
0a7de745
A
541 struct fileproc *fp;
542 int error;
91447636 543 int fd = uap->fd;
2d21ac55 544 vnode_t vp = (vnode_t)0;
91447636 545
b0d623f7
A
546 AUDIT_ARG(fd, fd);
547
f427ee49 548 error = fp_get_ftype(p, fd, DTYPE_VNODE, ESPIPE, &fp);
0a7de745
A
549 if (error) {
550 return error;
551 }
9bccf70c 552
91447636
A
553 if ((fp->f_flag & FWRITE) == 0) {
554 error = EBADF;
fe8ab488
A
555 } else if (FP_ISGUARDED(fp, GUARD_WRITE)) {
556 proc_fdlock(p);
557 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
558 proc_fdunlock(p);
91447636 559 } else {
2d21ac55 560 struct vfs_context context = *vfs_context_current();
f427ee49 561 context.vc_ucred = fp->fp_glob->fg_cred;
2d21ac55 562
f427ee49 563 vp = (vnode_t)fp->fp_glob->fg_data;
2d21ac55
A
564 if (vnode_isfifo(vp)) {
565 error = ESPIPE;
566 goto errout;
0a7de745 567 }
2d21ac55
A
568 if ((vp->v_flag & VISTTY)) {
569 error = ENXIO;
570 goto errout;
91447636 571 }
2d21ac55
A
572 if (uap->offset == (off_t)-1) {
573 error = EINVAL;
574 goto errout;
575 }
576
0a7de745
A
577 error = dofilewrite(&context, fp, uap->buf, uap->nbyte,
578 uap->offset, FOF_OFFSET, retval);
0a7de745 579 }
2d21ac55 580errout:
f427ee49 581 fp_drop(p, fd, fp, 0);
55e303ae 582
b7266188 583 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE),
0a7de745
A
584 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0);
585
586 return error;
9bccf70c
A
587}
588
2d21ac55
A
589/*
590 * Returns: 0 Success
591 * EINVAL
592 * <fo_write>:EPIPE
593 * <fo_write>:??? [indirect through struct fileops]
594 */
0a7de745 595__private_extern__ int
2d21ac55 596dofilewrite(vfs_context_t ctx, struct fileproc *fp,
0a7de745
A
597 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags,
598 user_ssize_t *retval)
599{
91447636 600 uio_t auio;
cb323159 601 int error = 0;
91447636 602 user_ssize_t bytecnt;
0a7de745 603 char uio_buf[UIO_SIZEOF(1)];
91447636 604
fe8ab488
A
605 if (nbyte > INT_MAX) {
606 *retval = 0;
0a7de745 607 return EINVAL;
fe8ab488 608 }
91447636 609
2d21ac55 610 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
0a7de745
A
611 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE,
612 &uio_buf[0], sizeof(uio_buf));
91447636 613 } else {
0a7de745
A
614 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE,
615 &uio_buf[0], sizeof(uio_buf));
91447636 616 }
cb323159
A
617 if (uio_addiov(auio, bufp, nbyte) != 0) {
618 *retval = 0;
619 return EINVAL;
620 }
91447636 621
0a7de745 622 bytecnt = nbyte;
2d21ac55 623 if ((error = fo_write(fp, auio, flags, ctx))) {
91447636 624 if (uio_resid(auio) != bytecnt && (error == ERESTART ||
0a7de745 625 error == EINTR || error == EWOULDBLOCK)) {
9bccf70c 626 error = 0;
0a7de745 627 }
55e303ae 628 /* The socket layer handles SIGPIPE */
6d2010ae 629 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
f427ee49 630 (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) == 0) {
2d21ac55
A
631 /* XXX Raise the signal on the thread? */
632 psignal(vfs_context_proc(ctx), SIGPIPE);
633 }
9bccf70c 634 }
91447636 635 bytecnt -= uio_resid(auio);
f427ee49
A
636 if (bytecnt) {
637 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
638 }
91447636
A
639 *retval = bytecnt;
640
0a7de745 641 return error;
1c79356b 642}
0a7de745
A
643
644/*
f427ee49
A
645 * Returns: 0 Success
646 * EBADF
647 * ESPIPE
648 * ENXIO
649 * fp_lookup:EBADF
650 * fp_guard_exception:???
0a7de745 651 */
f427ee49
A
652static int
653preparefilewrite(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pwrite)
2d21ac55 654{
f427ee49
A
655 vnode_t vp;
656 int error;
657 struct fileproc *fp;
658
659 AUDIT_ARG(fd, fd);
660
661 proc_fdlock_spin(p);
662
663 error = fp_lookup(p, fd, &fp, 1);
664
665 if (error) {
666 proc_fdunlock(p);
667 return error;
668 }
669 if ((fp->f_flag & FWRITE) == 0) {
670 error = EBADF;
671 goto ExitThisRoutine;
672 }
673 if (FP_ISGUARDED(fp, GUARD_WRITE)) {
674 error = fp_guard_exception(p, fd, fp, kGUARD_EXC_WRITE);
675 goto ExitThisRoutine;
676 }
677 if (check_for_pwrite) {
678 if (fp->f_type != DTYPE_VNODE) {
679 error = ESPIPE;
680 goto ExitThisRoutine;
681 }
682
683 vp = (vnode_t)fp->fp_glob->fg_data;
684 if (vnode_isfifo(vp)) {
685 error = ESPIPE;
686 goto ExitThisRoutine;
687 }
688 if ((vp->v_flag & VISTTY)) {
689 error = ENXIO;
690 goto ExitThisRoutine;
691 }
692 }
693
694 *fp_ret = fp;
695
696 proc_fdunlock(p);
697 return 0;
698
699ExitThisRoutine:
700 fp_drop(p, fd, fp, 1);
701 proc_fdunlock(p);
702 return error;
2d21ac55
A
703}
704
f427ee49
A
705static int
706writev_prwritev_uio(struct proc *p, int fd,
707 user_addr_t user_iovp, int iovcnt, off_t offset, int is_pwritev,
708 user_ssize_t *retval)
1c79356b 709{
91447636 710 uio_t auio = NULL;
1c79356b 711 int error;
91447636 712 struct user_iovec *iovp;
b0d623f7 713
f427ee49
A
714 /* Verify range before calling uio_create() */
715 if (iovcnt <= 0 || iovcnt > UIO_MAXIOV || offset < 0) {
0a7de745
A
716 return EINVAL;
717 }
91447636
A
718
719 /* allocate a uio large enough to hold the number of iovecs passed */
f427ee49 720 auio = uio_create(iovcnt, offset,
0a7de745
A
721 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
722 UIO_WRITE);
723
91447636
A
724 /* get location of iovecs within the uio. then copyin the iovecs from
725 * user space.
726 */
727 iovp = uio_iovsaddr(auio);
728 if (iovp == NULL) {
729 error = ENOMEM;
730 goto ExitThisRoutine;
731 }
f427ee49 732 error = copyin_user_iovec_array(user_iovp,
0a7de745 733 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32,
f427ee49 734 iovcnt, iovp);
91447636
A
735 if (error) {
736 goto ExitThisRoutine;
737 }
0a7de745
A
738
739 /* finalize uio_t for use and do the IO
91447636 740 */
39236c6e
A
741 error = uio_calculateresid(auio);
742 if (error) {
743 goto ExitThisRoutine;
744 }
fe8ab488 745
f427ee49 746 error = wr_uio(p, fd, auio, is_pwritev, retval);
91447636
A
747
748ExitThisRoutine:
749 if (auio != NULL) {
750 uio_free(auio);
751 }
0a7de745 752 return error;
1c79356b
A
753}
754
f427ee49
A
755/*
756 * Gather write system call
757 */
758int
759writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval)
760{
761 __pthread_testcancel(1);
762 return writev_nocancel(p, (struct writev_nocancel_args *)uap, retval);
763}
764
765int
766writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval)
767{
768 return writev_prwritev_uio(p, uap->fd, uap->iovp, uap->iovcnt, 0, 0, retval);
769}
770
771/*
772 * Pwritev system call
773 */
774int
775sys_pwritev(struct proc *p, struct pwritev_args *uap, user_ssize_t *retval)
776{
777 __pthread_testcancel(1);
778 return sys_pwritev_nocancel(p, (struct pwritev_nocancel_args *)uap, retval);
779}
780
781int
782sys_pwritev_nocancel(struct proc *p, struct pwritev_nocancel_args *uap, user_ssize_t *retval)
783{
784 return writev_prwritev_uio(p, uap->fd, uap->iovp, uap->iovcnt, uap->offset, 1, retval);
785}
786
787/*
788 * Returns: 0 Success
789 * preparefileread:EBADF
790 * preparefileread:ESPIPE
791 * preparefileread:ENXIO
792 * preparefileread:???
793 * fo_write:???
794 */
795int
796wr_uio(struct proc *p, int fd, uio_t uio, int is_pwritev, user_ssize_t *retval)
797{
798 struct fileproc *fp;
799 int error;
800 int flags;
801
802 if ((error = preparefilewrite(p, &fp, fd, is_pwritev))) {
803 return error;
804 }
805
806 flags = is_pwritev ? FOF_OFFSET : 0;
807 error = do_uiowrite(p, fp, uio, flags, retval);
808
809 fp_drop(p, fd, fp, 0);
810
811 return error;
812}
91447636 813
9bccf70c 814int
f427ee49 815do_uiowrite(struct proc *p, struct fileproc *fp, uio_t uio, int flags, user_ssize_t *retval)
1c79356b 816{
91447636
A
817 int error;
818 user_ssize_t count;
2d21ac55 819 struct vfs_context context = *vfs_context_current();
1c79356b 820
91447636 821 count = uio_resid(uio);
2d21ac55
A
822
823 context.vc_ucred = fp->f_cred;
f427ee49 824 error = fo_write(fp, uio, flags, &context);
91447636
A
825 if (error) {
826 if (uio_resid(uio) != count && (error == ERESTART ||
0a7de745
A
827 error == EINTR || error == EWOULDBLOCK)) {
828 error = 0;
829 }
91447636 830 /* The socket layer handles SIGPIPE */
6d2010ae 831 if (error == EPIPE && fp->f_type != DTYPE_SOCKET &&
f427ee49 832 (fp->fp_glob->fg_lflags & FG_NOSIGPIPE) == 0) {
0a7de745
A
833 psignal(p, SIGPIPE);
834 }
91447636 835 }
f427ee49
A
836 count -= uio_resid(uio);
837 if (count) {
838 os_atomic_or(&fp->fp_glob->fg_flag, FWASWRITTEN, relaxed);
839 }
840 *retval = count;
91447636 841
0a7de745 842 return error;
91447636
A
843}
844
f427ee49
A
845/*
846 * Returns: 0 Success
847 * preparefileread:EBADF
848 * preparefileread:ESPIPE
849 * preparefileread:ENXIO
850 * fo_read:???
851 */
91447636 852int
f427ee49 853rd_uio(struct proc *p, int fdes, uio_t uio, int is_preadv, user_ssize_t *retval)
91447636
A
854{
855 struct fileproc *fp;
856 int error;
857 user_ssize_t count;
2d21ac55 858 struct vfs_context context = *vfs_context_current();
91447636 859
f427ee49 860 if ((error = preparefileread(p, &fp, fdes, is_preadv))) {
0a7de745
A
861 return error;
862 }
91447636
A
863
864 count = uio_resid(uio);
2d21ac55
A
865
866 context.vc_ucred = fp->f_cred;
867
f427ee49
A
868 int flags = is_preadv ? FOF_OFFSET : 0;
869 error = fo_read(fp, uio, flags, &context);
9bccf70c 870
91447636 871 if (error) {
0a7de745
A
872 if (uio_resid(uio) != count && (error == ERESTART ||
873 error == EINTR || error == EWOULDBLOCK)) {
874 error = 0;
875 }
1c79356b 876 }
91447636 877 *retval = count - uio_resid(uio);
9bccf70c 878
f427ee49 879 fp_drop(p, fdes, fp, 0);
9bccf70c 880
0a7de745 881 return error;
1c79356b
A
882}
883
884/*
885 * Ioctl system call
91447636 886 *
2d21ac55
A
887 * Returns: 0 Success
888 * EBADF
889 * ENOTTY
890 * ENOMEM
891 * ESRCH
892 * copyin:EFAULT
893 * copyoutEFAULT
894 * fp_lookup:EBADF Bad file descriptor
895 * fo_ioctl:???
1c79356b 896 */
9bccf70c 897int
b0d623f7 898ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval)
1c79356b 899{
39236c6e 900 struct fileproc *fp = NULL;
91447636 901 int error = 0;
39236c6e
A
902 u_int size = 0;
903 caddr_t datap = NULL, memp = NULL;
904 boolean_t is64bit = FALSE;
905 int tmp = 0;
0a7de745 906#define STK_PARAMS 128
5ba3f43e 907 char stkbuf[STK_PARAMS] = {};
91447636 908 int fd = uap->fd;
39236c6e 909 u_long com = uap->com;
2d21ac55 910 struct vfs_context context = *vfs_context_current();
1c79356b 911
e5568f75 912 AUDIT_ARG(fd, uap->fd);
e5568f75 913 AUDIT_ARG(addr, uap->data);
91447636
A
914
915 is64bit = proc_is64bit(p);
b0d623f7 916#if CONFIG_AUDIT
0a7de745 917 if (is64bit) {
39236c6e 918 AUDIT_ARG(value64, com);
0a7de745 919 } else {
39236c6e 920 AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com));
0a7de745 921 }
b0d623f7 922#endif /* CONFIG_AUDIT */
91447636 923
1c79356b
A
924 /*
925 * Interpret high order word to find amount of data to be
926 * copied to/from the user's address space.
927 */
928 size = IOCPARM_LEN(com);
0a7de745
A
929 if (size > IOCPARM_MAX) {
930 return ENOTTY;
931 }
932 if (size > sizeof(stkbuf)) {
f427ee49
A
933 memp = (caddr_t)kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
934 if (memp == 0) {
39236c6e 935 return ENOMEM;
0a7de745 936 }
91447636 937 datap = memp;
0a7de745 938 } else {
91447636 939 datap = &stkbuf[0];
0a7de745 940 }
39236c6e 941 if (com & IOC_IN) {
1c79356b 942 if (size) {
91447636 943 error = copyin(uap->data, datap, size);
0a7de745 944 if (error) {
39236c6e 945 goto out_nofp;
0a7de745 946 }
91447636
A
947 } else {
948 /* XXX - IOC_IN and no size? we should proably return an error here!! */
949 if (is64bit) {
950 *(user_addr_t *)datap = uap->data;
0a7de745 951 } else {
91447636
A
952 *(uint32_t *)datap = (uint32_t)uap->data;
953 }
954 }
0a7de745 955 } else if ((com & IOC_OUT) && size) {
1c79356b
A
956 /*
957 * Zero the buffer so the user always
958 * gets back something deterministic.
959 */
91447636 960 bzero(datap, size);
0a7de745 961 } else if (com & IOC_VOID) {
91447636
A
962 /* XXX - this is odd since IOC_VOID means no parameters */
963 if (is64bit) {
964 *(user_addr_t *)datap = uap->data;
0a7de745 965 } else {
91447636
A
966 *(uint32_t *)datap = (uint32_t)uap->data;
967 }
968 }
1c79356b 969
39236c6e 970 proc_fdlock(p);
0a7de745
A
971 error = fp_lookup(p, fd, &fp, 1);
972 if (error) {
39236c6e
A
973 proc_fdunlock(p);
974 goto out_nofp;
975 }
976
977 AUDIT_ARG(file, p, fp);
978
979 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
0a7de745
A
980 error = EBADF;
981 goto out;
39236c6e
A
982 }
983
f427ee49 984 context.vc_ucred = fp->fp_glob->fg_cred;
39236c6e
A
985
986#if CONFIG_MACF
f427ee49 987 error = mac_file_check_ioctl(context.vc_ucred, fp->fp_glob, com);
0a7de745 988 if (error) {
39236c6e 989 goto out;
0a7de745 990 }
39236c6e
A
991#endif
992
1c79356b 993 switch (com) {
39236c6e
A
994 case FIONCLEX:
995 *fdflags(p, fd) &= ~UF_EXCLOSE;
996 break;
997
998 case FIOCLEX:
999 *fdflags(p, fd) |= UF_EXCLOSE;
1000 break;
1c79356b
A
1001
1002 case FIONBIO:
f427ee49
A
1003 // FIXME (rdar://54898652)
1004 //
1005 // this code is broken if fnctl(F_SETFL), ioctl() are
1006 // called concurrently for the same fileglob.
0a7de745 1007 if ((tmp = *(int *)datap)) {
f427ee49 1008 os_atomic_or(&fp->f_flag, FNONBLOCK, relaxed);
0a7de745 1009 } else {
f427ee49 1010 os_atomic_andnot(&fp->f_flag, FNONBLOCK, relaxed);
0a7de745 1011 }
2d21ac55 1012 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context);
1c79356b
A
1013 break;
1014
1015 case FIOASYNC:
f427ee49
A
1016 // FIXME (rdar://54898652)
1017 //
1018 // this code is broken if fnctl(F_SETFL), ioctl() are
1019 // called concurrently for the same fileglob.
0a7de745 1020 if ((tmp = *(int *)datap)) {
f427ee49 1021 os_atomic_or(&fp->f_flag, FASYNC, relaxed);
0a7de745 1022 } else {
f427ee49 1023 os_atomic_andnot(&fp->f_flag, FASYNC, relaxed);
0a7de745 1024 }
2d21ac55 1025 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context);
1c79356b
A
1026 break;
1027
1028 case FIOSETOWN:
91447636 1029 tmp = *(int *)datap;
1c79356b
A
1030 if (fp->f_type == DTYPE_SOCKET) {
1031 ((struct socket *)fp->f_data)->so_pgid = tmp;
1c79356b
A
1032 break;
1033 }
91447636 1034 if (fp->f_type == DTYPE_PIPE) {
cb323159 1035 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
91447636
A
1036 break;
1037 }
1c79356b
A
1038 if (tmp <= 0) {
1039 tmp = -tmp;
1040 } else {
2d21ac55 1041 struct proc *p1 = proc_find(tmp);
1c79356b
A
1042 if (p1 == 0) {
1043 error = ESRCH;
1044 break;
1045 }
2d21ac55
A
1046 tmp = p1->p_pgrpid;
1047 proc_rele(p1);
1c79356b 1048 }
cb323159 1049 error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context);
1c79356b
A
1050 break;
1051
1052 case FIOGETOWN:
1053 if (fp->f_type == DTYPE_SOCKET) {
91447636 1054 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid;
1c79356b
A
1055 break;
1056 }
2d21ac55 1057 error = fo_ioctl(fp, TIOCGPGRP, datap, &context);
91447636 1058 *(int *)datap = -*(int *)datap;
1c79356b
A
1059 break;
1060
1061 default:
2d21ac55 1062 error = fo_ioctl(fp, com, datap, &context);
1c79356b
A
1063 /*
1064 * Copy any data to user, size was
1065 * already set and checked above.
1066 */
0a7de745 1067 if (error == 0 && (com & IOC_OUT) && size) {
91447636 1068 error = copyout(datap, uap->data, (u_int)size);
0a7de745 1069 }
1c79356b
A
1070 break;
1071 }
91447636
A
1072out:
1073 fp_drop(p, fd, fp, 1);
1074 proc_fdunlock(p);
39236c6e
A
1075
1076out_nofp:
0a7de745 1077 if (memp) {
f427ee49 1078 kheap_free(KHEAP_TEMP, memp, size);
0a7de745
A
1079 }
1080 return error;
1c79356b
A
1081}
1082
0a7de745 1083int selwait, nselcoll;
0b4e3aa0
A
1084#define SEL_FIRSTPASS 1
1085#define SEL_SECONDPASS 2
9bccf70c
A
1086extern int selcontinue(int error);
1087extern int selprocess(int error, int sel_pass);
fe8ab488 1088static int selscan(struct proc *p, struct _select * sel, struct _select_data * seldata,
0a7de745 1089 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset);
6d2010ae 1090static int selcount(struct proc *p, u_int32_t *ibits, int nfd, int *count);
f427ee49
A
1091static int seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup);
1092static int seldrop(struct proc *p, u_int32_t *ibits, int nfd, int lim);
4bd07ac2 1093static int select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval);
1c79356b
A
1094
1095/*
1096 * Select system call.
2d21ac55
A
1097 *
1098 * Returns: 0 Success
1099 * EINVAL Invalid argument
1100 * EAGAIN Nonconformant error if allocation fails
1c79356b 1101 */
9bccf70c 1102int
b0d623f7 1103select(struct proc *p, struct select_args *uap, int32_t *retval)
2d21ac55
A
1104{
1105 __pthread_testcancel(1);
4bd07ac2 1106 return select_nocancel(p, (struct select_nocancel_args *)uap, retval);
2d21ac55
A
1107}
1108
1109int
b0d623f7 1110select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval)
4bd07ac2
A
1111{
1112 uint64_t timeout = 0;
1113
1114 if (uap->tv) {
1115 int err;
1116 struct timeval atv;
1117 if (IS_64BIT_PROCESS(p)) {
1118 struct user64_timeval atv64;
1119 err = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64));
1120 /* Loses resolution - assume timeout < 68 years */
f427ee49 1121 atv.tv_sec = (__darwin_time_t)atv64.tv_sec;
4bd07ac2
A
1122 atv.tv_usec = atv64.tv_usec;
1123 } else {
1124 struct user32_timeval atv32;
1125 err = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32));
1126 atv.tv_sec = atv32.tv_sec;
1127 atv.tv_usec = atv32.tv_usec;
1128 }
0a7de745 1129 if (err) {
4bd07ac2 1130 return err;
0a7de745 1131 }
4bd07ac2
A
1132
1133 if (itimerfix(&atv)) {
1134 err = EINVAL;
1135 return err;
1136 }
1137
1138 clock_absolutetime_interval_to_deadline(tvtoabstime(&atv), &timeout);
1139 }
1140
1141 return select_internal(p, uap, timeout, retval);
1142}
1143
1144int
1145pselect(struct proc *p, struct pselect_args *uap, int32_t *retval)
1146{
1147 __pthread_testcancel(1);
1148 return pselect_nocancel(p, (struct pselect_nocancel_args *)uap, retval);
1149}
1150
1151int
1152pselect_nocancel(struct proc *p, struct pselect_nocancel_args *uap, int32_t *retval)
1153{
1154 int err;
1155 struct uthread *ut;
1156 uint64_t timeout = 0;
1157
1158 if (uap->ts) {
1159 struct timespec ts;
1160
1161 if (IS_64BIT_PROCESS(p)) {
1162 struct user64_timespec ts64;
1163 err = copyin(uap->ts, (caddr_t)&ts64, sizeof(ts64));
f427ee49
A
1164 ts.tv_sec = (__darwin_time_t)ts64.tv_sec;
1165 ts.tv_nsec = (long)ts64.tv_nsec;
4bd07ac2
A
1166 } else {
1167 struct user32_timespec ts32;
1168 err = copyin(uap->ts, (caddr_t)&ts32, sizeof(ts32));
1169 ts.tv_sec = ts32.tv_sec;
1170 ts.tv_nsec = ts32.tv_nsec;
1171 }
1172 if (err) {
1173 return err;
1174 }
1175
1176 if (!timespec_is_valid(&ts)) {
1177 return EINVAL;
1178 }
1179 clock_absolutetime_interval_to_deadline(tstoabstime(&ts), &timeout);
1180 }
1181
1182 ut = get_bsdthread_info(current_thread());
1183
1184 if (uap->mask != USER_ADDR_NULL) {
1185 /* save current mask, then copyin and set new mask */
1186 sigset_t newset;
1187 err = copyin(uap->mask, &newset, sizeof(sigset_t));
1188 if (err) {
1189 return err;
1190 }
1191 ut->uu_oldmask = ut->uu_sigmask;
1192 ut->uu_flag |= UT_SAS_OLDMASK;
1193 ut->uu_sigmask = (newset & ~sigcantmask);
1194 }
1195
1196 err = select_internal(p, (struct select_nocancel_args *)uap, timeout, retval);
1197
1198 if (err != EINTR && ut->uu_flag & UT_SAS_OLDMASK) {
1199 /*
1200 * Restore old mask (direct return case). NOTE: EINTR can also be returned
1201 * if the thread is cancelled. In that case, we don't reset the signal
1202 * mask to its original value (which usually happens in the signal
1203 * delivery path). This behavior is permitted by POSIX.
1204 */
1205 ut->uu_sigmask = ut->uu_oldmask;
1206 ut->uu_oldmask = 0;
1207 ut->uu_flag &= ~UT_SAS_OLDMASK;
1208 }
1209
1210 return err;
1211}
1212
1213/*
1214 * Generic implementation of {,p}select. Care: we type-pun uap across the two
1215 * syscalls, which differ slightly. The first 4 arguments (nfds and the fd sets)
1216 * are identical. The 5th (timeout) argument points to different types, so we
1217 * unpack in the syscall-specific code, but the generic code still does a null
1218 * check on this argument to determine if a timeout was specified.
1219 */
1220static int
1221select_internal(struct proc *p, struct select_nocancel_args *uap, uint64_t timeout, int32_t *retval)
1c79356b 1222{
9bccf70c 1223 int error = 0;
3e170ce0 1224 u_int ni, nw;
91447636 1225 thread_t th_act;
0a7de745 1226 struct uthread *uth;
1c79356b 1227 struct _select *sel;
fe8ab488 1228 struct _select_data *seldata;
1c79356b 1229 int needzerofill = 1;
0b4e3aa0 1230 int count = 0;
3e170ce0 1231 size_t sz = 0;
1c79356b 1232
91447636 1233 th_act = current_thread();
1c79356b 1234 uth = get_bsdthread_info(th_act);
91447636 1235 sel = &uth->uu_select;
d9a64523 1236 seldata = &uth->uu_save.uus_select_data;
1c79356b
A
1237 *retval = 0;
1238
fe8ab488
A
1239 seldata->args = uap;
1240 seldata->retval = retval;
3e170ce0
A
1241 seldata->wqp = NULL;
1242 seldata->count = 0;
fe8ab488 1243
0b4e3aa0 1244 if (uap->nd < 0) {
0a7de745 1245 return EINVAL;
0b4e3aa0 1246 }
1c79356b 1247
2d21ac55
A
1248 /* select on thread of process that already called proc_exit() */
1249 if (p->p_fd == NULL) {
0a7de745 1250 return EBADF;
2d21ac55
A
1251 }
1252
0a7de745 1253 if (uap->nd > p->p_fd->fd_nfiles) {
1c79356b 1254 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */
0a7de745 1255 }
1c79356b
A
1256 nw = howmany(uap->nd, NFDBITS);
1257 ni = nw * sizeof(fd_mask);
1258
1259 /*
2d21ac55
A
1260 * if the previously allocated space for the bits is smaller than
1261 * what is requested or no space has yet been allocated for this
1262 * thread, allocate enough space now.
1263 *
1264 * Note: If this process fails, select() will return EAGAIN; this
1265 * is the same thing pool() returns in a no-memory situation, but
1266 * it is not a POSIX compliant error code for select().
1c79356b
A
1267 */
1268 if (sel->nbytes < (3 * ni)) {
2d21ac55
A
1269 int nbytes = 3 * ni;
1270
1271 /* Free previous allocation, if any */
0a7de745 1272 if (sel->ibits != NULL) {
2d21ac55 1273 FREE(sel->ibits, M_TEMP);
0a7de745 1274 }
2d21ac55
A
1275 if (sel->obits != NULL) {
1276 FREE(sel->obits, M_TEMP);
1277 /* NULL out; subsequent ibits allocation may fail */
1278 sel->obits = NULL;
1279 }
1280
1281 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
0a7de745
A
1282 if (sel->ibits == NULL) {
1283 return EAGAIN;
1284 }
2d21ac55
A
1285 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO);
1286 if (sel->obits == NULL) {
1287 FREE(sel->ibits, M_TEMP);
1288 sel->ibits = NULL;
0a7de745 1289 return EAGAIN;
2d21ac55
A
1290 }
1291 sel->nbytes = nbytes;
1c79356b 1292 needzerofill = 0;
2d21ac55 1293 }
1c79356b
A
1294
1295 if (needzerofill) {
1296 bzero((caddr_t)sel->ibits, sel->nbytes);
1297 bzero((caddr_t)sel->obits, sel->nbytes);
1298 }
1299
1300 /*
1301 * get the bits from the user address space
1302 */
0a7de745 1303#define getbits(name, x) \
1c79356b 1304 do { \
0a7de745
A
1305 if (uap->name && (error = copyin(uap->name, \
1306 (caddr_t)&sel->ibits[(x) * nw], ni))) \
1307 goto continuation; \
1c79356b
A
1308 } while (0)
1309
1310 getbits(in, 0);
1311 getbits(ou, 1);
1312 getbits(ex, 2);
0a7de745 1313#undef getbits
1c79356b 1314
4bd07ac2 1315 seldata->abstime = timeout;
9bccf70c 1316
0a7de745
A
1317 if ((error = selcount(p, sel->ibits, uap->nd, &count))) {
1318 goto continuation;
0b4e3aa0 1319 }
b0d623f7 1320
3e170ce0
A
1321 /*
1322 * We need an array of waitq pointers. This is due to the new way
1323 * in which waitqs are linked to sets. When a thread selects on a
1324 * file descriptor, a waitq (embedded in a selinfo structure) is
1325 * added to the thread's local waitq set. There is no longer any
1326 * way to directly iterate over all members of a given waitq set.
1327 * The process of linking a waitq into a set may allocate a link
1328 * table object. Because we can't iterate over all the waitqs to
1329 * which our thread waitq set belongs, we need a way of removing
1330 * this link object!
1331 *
1332 * Thus we need a buffer which will hold one waitq pointer
1333 * per FD being selected. During the tear-down phase we can use
1334 * these pointers to dis-associate the underlying selinfo's waitq
1335 * from our thread's waitq set.
1336 *
1337 * Because we also need to allocate a waitq set for this thread,
1338 * we use a bare buffer pointer to hold all the memory. Note that
1339 * this memory is cached in the thread pointer and not reaped until
1340 * the thread exists. This is generally OK because threads that
1341 * call select tend to keep calling select repeatedly.
1342 */
1343 sz = ALIGN(sizeof(struct waitq_set)) + (count * sizeof(uint64_t));
1344 if (sz > uth->uu_wqstate_sz) {
1345 /* (re)allocate a buffer to hold waitq pointers */
1346 if (uth->uu_wqset) {
0a7de745 1347 if (waitq_set_is_valid(uth->uu_wqset)) {
3e170ce0 1348 waitq_set_deinit(uth->uu_wqset);
0a7de745 1349 }
3e170ce0 1350 FREE(uth->uu_wqset, M_SELECT);
0a7de745 1351 } else if (uth->uu_wqstate_sz && !uth->uu_wqset) {
3e170ce0 1352 panic("select: thread structure corrupt! "
0a7de745
A
1353 "uu_wqstate_sz:%ld, wqstate_buf == NULL",
1354 uth->uu_wqstate_sz);
1355 }
3e170ce0
A
1356 uth->uu_wqstate_sz = sz;
1357 MALLOC(uth->uu_wqset, struct waitq_set *, sz, M_SELECT, M_WAITOK);
0a7de745 1358 if (!uth->uu_wqset) {
3e170ce0 1359 panic("can't allocate %ld bytes for wqstate buffer",
0a7de745
A
1360 uth->uu_wqstate_sz);
1361 }
3e170ce0 1362 waitq_set_init(uth->uu_wqset,
0a7de745 1363 SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST, NULL, NULL);
3e170ce0
A
1364 }
1365
0a7de745 1366 if (!waitq_set_is_valid(uth->uu_wqset)) {
3e170ce0 1367 waitq_set_init(uth->uu_wqset,
0a7de745
A
1368 SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST, NULL, NULL);
1369 }
3e170ce0
A
1370
1371 /* the last chunk of our buffer is an array of waitq pointers */
1372 seldata->wqp = (uint64_t *)((char *)(uth->uu_wqset) + ALIGN(sizeof(struct waitq_set)));
1373 bzero(seldata->wqp, sz - ALIGN(sizeof(struct waitq_set)));
1374
fe8ab488 1375 seldata->count = count;
0b4e3aa0 1376
1c79356b 1377continuation:
6d2010ae
A
1378
1379 if (error) {
1380 /*
1381 * We have already cleaned up any state we established,
1382 * either locally or as a result of selcount(). We don't
1383 * need to wait_subqueue_unlink_all(), since we haven't set
1384 * anything at this point.
1385 */
0a7de745 1386 return error;
6d2010ae
A
1387 }
1388
1389 return selprocess(0, SEL_FIRSTPASS);
0b4e3aa0
A
1390}
1391
1392int
1393selcontinue(int error)
1394{
9bccf70c 1395 return selprocess(error, SEL_SECONDPASS);
1c79356b
A
1396}
1397
6d2010ae
A
1398
1399/*
1400 * selprocess
1401 *
1402 * Parameters: error The error code from our caller
1403 * sel_pass The pass we are on
1404 */
1c79356b 1405int
91447636 1406selprocess(int error, int sel_pass)
1c79356b 1407{
9bccf70c 1408 int ncoll;
1c79356b 1409 u_int ni, nw;
91447636 1410 thread_t th_act;
0a7de745 1411 struct uthread *uth;
1c79356b 1412 struct proc *p;
fe8ab488 1413 struct select_nocancel_args *uap;
1c79356b
A
1414 int *retval;
1415 struct _select *sel;
fe8ab488 1416 struct _select_data *seldata;
0b4e3aa0 1417 int unwind = 1;
9bccf70c 1418 int prepost = 0;
0b4e3aa0
A
1419 int somewakeup = 0;
1420 int doretry = 0;
9bccf70c 1421 wait_result_t wait_result;
1c79356b
A
1422
1423 p = current_proc();
91447636 1424 th_act = current_thread();
1c79356b 1425 uth = get_bsdthread_info(th_act);
91447636 1426 sel = &uth->uu_select;
d9a64523 1427 seldata = &uth->uu_save.uus_select_data;
fe8ab488
A
1428 uap = seldata->args;
1429 retval = seldata->retval;
1c79356b 1430
0a7de745 1431 if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) {
3e170ce0 1432 unwind = 0;
0a7de745
A
1433 }
1434 if (seldata->count == 0) {
3e170ce0 1435 unwind = 0;
0a7de745 1436 }
1c79356b 1437retry:
0a7de745 1438 if (error != 0) {
6d2010ae 1439 goto done;
0a7de745 1440 }
0b4e3aa0 1441
1c79356b 1442 ncoll = nselcoll;
b0d623f7 1443 OSBitOrAtomic(P_SELECT, &p->p_flag);
3e170ce0 1444
0b4e3aa0 1445 /* skip scans if the select is just for timeouts */
fe8ab488 1446 if (seldata->count) {
3e170ce0 1447 error = selscan(p, sel, seldata, uap->nd, retval, sel_pass, uth->uu_wqset);
0b4e3aa0
A
1448 if (error || *retval) {
1449 goto done;
1450 }
3e170ce0
A
1451 if (prepost || somewakeup) {
1452 /*
1453 * if the select of log, then we can wakeup and
1454 * discover some one else already read the data;
1455 * go to select again if time permits
1456 */
1457 prepost = 0;
1458 somewakeup = 0;
1459 doretry = 1;
0b4e3aa0
A
1460 }
1461 }
1462
9bccf70c 1463 if (uap->tv) {
0a7de745 1464 uint64_t now;
9bccf70c
A
1465
1466 clock_get_uptime(&now);
0a7de745 1467 if (now >= seldata->abstime) {
9bccf70c 1468 goto done;
0a7de745 1469 }
1c79356b 1470 }
0b4e3aa0
A
1471
1472 if (doretry) {
1473 /* cleanup obits and try again */
1474 doretry = 0;
1475 sel_pass = SEL_FIRSTPASS;
1476 goto retry;
1477 }
1478
1c79356b
A
1479 /*
1480 * To effect a poll, the timeout argument should be
1481 * non-nil, pointing to a zero-valued timeval structure.
1482 */
fe8ab488 1483 if (uap->tv && seldata->abstime == 0) {
1c79356b
A
1484 goto done;
1485 }
0b4e3aa0
A
1486
1487 /* No spurious wakeups due to colls,no need to check for them */
0a7de745 1488 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) {
0b4e3aa0 1489 sel_pass = SEL_FIRSTPASS;
1c79356b
A
1490 goto retry;
1491 }
0b4e3aa0 1492
b0d623f7 1493 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b 1494
0b4e3aa0 1495 /* if the select is just for timeout skip check */
0a7de745 1496 if (seldata->count && (sel_pass == SEL_SECONDPASS)) {
0b4e3aa0 1497 panic("selprocess: 2nd pass assertwaiting");
0a7de745 1498 }
0b4e3aa0 1499
3e170ce0
A
1500 /* waitq_set has waitqueue as first element */
1501 wait_result = waitq_assert_wait64_leeway((struct waitq *)uth->uu_wqset,
0a7de745
A
1502 NO_EVENT64, THREAD_ABORTSAFE,
1503 TIMEOUT_URGENCY_USER_NORMAL,
1504 seldata->abstime,
1505 TIMEOUT_NO_LEEWAY);
9bccf70c
A
1506 if (wait_result != THREAD_AWAKENED) {
1507 /* there are no preposted events */
91447636 1508 error = tsleep1(NULL, PSOCK | PCATCH,
0a7de745
A
1509 "select", 0, selcontinue);
1510 } else {
0b4e3aa0
A
1511 prepost = 1;
1512 error = 0;
1513 }
1514
0b4e3aa0 1515 if (error == 0) {
6d2010ae 1516 sel_pass = SEL_SECONDPASS;
0a7de745 1517 if (!prepost) {
6d2010ae 1518 somewakeup = 1;
0a7de745 1519 }
1c79356b 1520 goto retry;
0b4e3aa0 1521 }
1c79356b 1522done:
91447636 1523 if (unwind) {
f427ee49 1524 seldrop(p, sel->ibits, uap->nd, seldata->count);
3e170ce0
A
1525 waitq_set_deinit(uth->uu_wqset);
1526 /*
1527 * zero out the waitq pointer array to avoid use-after free
1528 * errors in the selcount error path (seldrop_locked) if/when
1529 * the thread re-calls select().
1530 */
1531 bzero((void *)uth->uu_wqset, uth->uu_wqstate_sz);
91447636 1532 }
b0d623f7 1533 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
1c79356b 1534 /* select is not restarted after signals... */
0a7de745 1535 if (error == ERESTART) {
1c79356b 1536 error = EINTR;
0a7de745
A
1537 }
1538 if (error == EWOULDBLOCK) {
1c79356b 1539 error = 0;
0a7de745 1540 }
1c79356b
A
1541 nw = howmany(uap->nd, NFDBITS);
1542 ni = nw * sizeof(fd_mask);
1543
0a7de745 1544#define putbits(name, x) \
1c79356b 1545 do { \
0a7de745
A
1546 if (uap->name && (error2 = \
1547 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \
1548 error = error2; \
1c79356b
A
1549 } while (0)
1550
1551 if (error == 0) {
1552 int error2;
1553
1554 putbits(in, 0);
1555 putbits(ou, 1);
1556 putbits(ex, 2);
1557#undef putbits
1558 }
4bd07ac2
A
1559
1560 if (error != EINTR && sel_pass == SEL_SECONDPASS && uth->uu_flag & UT_SAS_OLDMASK) {
1561 /* restore signal mask - continuation case */
1562 uth->uu_sigmask = uth->uu_oldmask;
1563 uth->uu_oldmask = 0;
1564 uth->uu_flag &= ~UT_SAS_OLDMASK;
1565 }
1566
0a7de745 1567 return error;
1c79356b
A
1568}
1569
6d2010ae 1570
3e170ce0
A
1571/**
1572 * remove the fileproc's underlying waitq from the supplied waitq set;
1573 * clear FP_INSELECT when appropriate
1574 *
1575 * Parameters:
1576 * fp File proc that is potentially currently in select
1577 * wqset Waitq set to which the fileproc may belong
1578 * (usually this is the thread's private waitq set)
1579 * Conditions:
1580 * proc_fdlock is held
1581 */
0a7de745
A
1582static void
1583selunlinkfp(struct fileproc *fp, uint64_t wqp_id, struct waitq_set *wqset)
3e170ce0
A
1584{
1585 int valid_set = waitq_set_is_valid(wqset);
1586 int valid_q = !!wqp_id;
1587
1588 /*
1589 * This could be called (from selcount error path) before we setup
1590 * the thread's wqset. Check the wqset passed in, and only unlink if
1591 * the set is valid.
1592 */
1593
1594 /* unlink the underlying waitq from the input set (thread waitq set) */
0a7de745 1595 if (valid_q && valid_set) {
3e170ce0 1596 waitq_unlink_by_prepost_id(wqp_id, wqset);
0a7de745 1597 }
3e170ce0 1598
f427ee49
A
1599 /* allow passing a invalid fp for seldrop unwind */
1600 if (!(fp->fp_flags & (FP_INSELECT | FP_SELCONFLICT))) {
3e170ce0 1601 return;
0a7de745 1602 }
3e170ce0
A
1603
1604 /*
1605 * We can always remove the conflict queue from our thread's set: this
1606 * will not affect other threads that potentially need to be awoken on
1607 * the conflict queue during a fileproc_drain - those sets will still
1608 * be linked with the global conflict queue, and the last waiter
1609 * on the fp clears the CONFLICT marker.
1610 */
f427ee49 1611 if (valid_set && (fp->fp_flags & FP_SELCONFLICT)) {
3e170ce0 1612 waitq_unlink(&select_conflict_queue, wqset);
0a7de745 1613 }
3e170ce0
A
1614
1615 /* jca: TODO:
1616 * This isn't quite right - we don't actually know if this
1617 * fileproc is in another select or not! Here we just assume
1618 * that if we were the first thread to select on the FD, then
1619 * we'll be the one to clear this flag...
1620 */
f427ee49
A
1621 if (valid_set && fp->fp_wset == (void *)wqset) {
1622 fp->fp_flags &= ~FP_INSELECT;
1623 fp->fp_wset = NULL;
3e170ce0
A
1624 }
1625}
1626
1627/**
1628 * connect a fileproc to the given wqset, potentially bridging to a waitq
1629 * pointed to indirectly by wq_data
1630 *
1631 * Parameters:
1632 * fp File proc potentially currently in select
1633 * wq_data Pointer to a pointer to a waitq (could be NULL)
1634 * wqset Waitq set to which the fileproc should now belong
1635 * (usually this is the thread's private waitq set)
1636 *
1637 * Conditions:
1638 * proc_fdlock is held
1639 */
0a7de745
A
1640static uint64_t
1641sellinkfp(struct fileproc *fp, void **wq_data, struct waitq_set *wqset)
3e170ce0
A
1642{
1643 struct waitq *f_wq = NULL;
1644
f427ee49 1645 if ((fp->fp_flags & FP_INSELECT) != FP_INSELECT) {
0a7de745 1646 if (wq_data) {
3e170ce0 1647 panic("non-null data:%p on fp:%p not in select?!"
0a7de745
A
1648 "(wqset:%p)", wq_data, fp, wqset);
1649 }
3e170ce0
A
1650 return 0;
1651 }
1652
f427ee49 1653 if ((fp->fp_flags & FP_SELCONFLICT) == FP_SELCONFLICT) {
d9a64523 1654 waitq_link(&select_conflict_queue, wqset, WAITQ_SHOULD_LOCK, NULL);
3e170ce0
A
1655 }
1656
1657 /*
1658 * The wq_data parameter has potentially been set by selrecord called
1659 * from a subsystems fo_select() function. If the subsystem does not
1660 * call selrecord, then wq_data will be NULL
1661 *
1662 * Use memcpy to get the value into a proper pointer because
1663 * wq_data most likely points to a stack variable that could be
1664 * unaligned on 32-bit systems.
1665 */
1666 if (wq_data) {
1667 memcpy(&f_wq, wq_data, sizeof(f_wq));
0a7de745 1668 if (!waitq_is_valid(f_wq)) {
3e170ce0 1669 f_wq = NULL;
0a7de745 1670 }
3e170ce0
A
1671 }
1672
1673 /* record the first thread's wqset in the fileproc structure */
f427ee49
A
1674 if (!fp->fp_wset) {
1675 fp->fp_wset = (void *)wqset;
0a7de745 1676 }
3e170ce0
A
1677
1678 /* handles NULL f_wq */
1679 return waitq_get_prepost_id(f_wq);
1680}
1681
1682
6d2010ae
A
1683/*
1684 * selscan
1685 *
1686 * Parameters: p Process performing the select
1687 * sel The per-thread select context structure
1688 * nfd The number of file descriptors to scan
1689 * retval The per thread system call return area
1690 * sel_pass Which pass this is; allowed values are
1691 * SEL_FIRSTPASS and SEL_SECONDPASS
3e170ce0 1692 * wqset The per thread wait queue set
6d2010ae
A
1693 *
1694 * Returns: 0 Success
1695 * EIO Invalid p->p_fd field XXX Obsolete?
1696 * EBADF One of the files in the bit vector is
1697 * invalid.
1698 */
1c79356b 1699static int
3e170ce0 1700selscan(struct proc *p, struct _select *sel, struct _select_data * seldata,
0a7de745 1701 int nfd, int32_t *retval, int sel_pass, struct waitq_set *wqset)
1c79356b 1702{
2d21ac55
A
1703 struct filedesc *fdp = p->p_fd;
1704 int msk, i, j, fd;
1705 u_int32_t bits;
91447636 1706 struct fileproc *fp;
0a7de745
A
1707 int n = 0; /* count of bits */
1708 int nc = 0; /* bit vector offset (nc'th bit) */
1c79356b
A
1709 static int flag[3] = { FREAD, FWRITE, 0 };
1710 u_int32_t *iptr, *optr;
1711 u_int nw;
0b4e3aa0 1712 u_int32_t *ibits, *obits;
3e170ce0 1713 uint64_t reserved_link, *rl_ptr = NULL;
6d2010ae 1714 int count;
2d21ac55 1715 struct vfs_context context = *vfs_context_current();
1c79356b
A
1716
1717 /*
1718 * Problems when reboot; due to MacOSX signal probs
1719 * in Beaker1C ; verify that the p->p_fd is valid
1720 */
1721 if (fdp == NULL) {
0a7de745
A
1722 *retval = 0;
1723 return EIO;
1c79356b 1724 }
0b4e3aa0
A
1725 ibits = sel->ibits;
1726 obits = sel->obits;
0b4e3aa0 1727
1c79356b
A
1728 nw = howmany(nfd, NFDBITS);
1729
fe8ab488 1730 count = seldata->count;
2d21ac55
A
1731
1732 nc = 0;
3e170ce0
A
1733 if (!count) {
1734 *retval = 0;
1735 return 0;
1736 }
1737
1738 proc_fdlock(p);
1739 for (msk = 0; msk < 3; msk++) {
1740 iptr = (u_int32_t *)&ibits[msk * nw];
1741 optr = (u_int32_t *)&obits[msk * nw];
1742
1743 for (i = 0; i < nfd; i += NFDBITS) {
0a7de745 1744 bits = iptr[i / NFDBITS];
2d21ac55 1745
3e170ce0 1746 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
cb323159 1747 bits &= ~(1U << j);
2d21ac55 1748
f427ee49
A
1749 fp = fp_get_noref_locked(p, fd);
1750 if (fp == NULL) {
3e170ce0
A
1751 /*
1752 * If we abort because of a bad
1753 * fd, let the caller unwind...
1754 */
1755 proc_fdunlock(p);
0a7de745 1756 return EBADF;
3e170ce0
A
1757 }
1758 if (sel_pass == SEL_SECONDPASS) {
1759 reserved_link = 0;
1760 rl_ptr = NULL;
1761 selunlinkfp(fp, seldata->wqp[nc], wqset);
1762 } else {
1763 reserved_link = waitq_link_reserve((struct waitq *)wqset);
1764 rl_ptr = &reserved_link;
f427ee49 1765 if (fp->fp_flags & FP_INSELECT) {
3e170ce0 1766 /* someone is already in select on this fp */
f427ee49 1767 fp->fp_flags |= FP_SELCONFLICT;
0a7de745 1768 } else {
f427ee49 1769 fp->fp_flags |= FP_INSELECT;
0a7de745 1770 }
d9a64523
A
1771
1772 waitq_set_lazy_init_link(wqset);
3e170ce0 1773 }
2d21ac55 1774
3e170ce0 1775 context.vc_ucred = fp->f_cred;
2d21ac55 1776
3e170ce0
A
1777 /*
1778 * stash this value b/c fo_select may replace
1779 * reserved_link with a pointer to a waitq object
1780 */
1781 uint64_t rsvd = reserved_link;
1782
1783 /* The select; set the bit, if true */
1784 if (fp->f_ops && fp->f_type
0a7de745 1785 && fo_select(fp, flag[msk], rl_ptr, &context)) {
cb323159 1786 optr[fd / NFDBITS] |= (1U << (fd % NFDBITS));
3e170ce0
A
1787 n++;
1788 }
1789 if (sel_pass == SEL_FIRSTPASS) {
1790 waitq_link_release(rsvd);
1791 /*
1792 * If the fp's supporting selinfo structure was linked
1793 * to this thread's waitq set, then 'reserved_link'
1794 * will have been updated by selrecord to be a pointer
1795 * to the selinfo's waitq.
1796 */
0a7de745 1797 if (reserved_link == rsvd) {
3e170ce0 1798 rl_ptr = NULL; /* fo_select never called selrecord() */
0a7de745 1799 }
3e170ce0
A
1800 /*
1801 * Hook up the thread's waitq set either to
1802 * the fileproc structure, or to the global
1803 * conflict queue: but only on the first
1804 * select pass.
1805 */
1806 seldata->wqp[nc] = sellinkfp(fp, (void **)rl_ptr, wqset);
2d21ac55 1807 }
3e170ce0 1808 nc++;
2d21ac55
A
1809 }
1810 }
0b4e3aa0 1811 }
3e170ce0
A
1812 proc_fdunlock(p);
1813
1c79356b 1814 *retval = n;
0a7de745 1815 return 0;
1c79356b
A
1816}
1817
cb323159 1818static int poll_callback(struct kevent_qos_s *, kevent_ctx_t);
91447636 1819
9bccf70c 1820int
b0d623f7 1821poll(struct proc *p, struct poll_args *uap, int32_t *retval)
2d21ac55
A
1822{
1823 __pthread_testcancel(1);
0a7de745 1824 return poll_nocancel(p, (struct poll_nocancel_args *)uap, retval);
2d21ac55
A
1825}
1826
1827
1828int
b0d623f7 1829poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval)
1c79356b 1830{
cb323159
A
1831 struct pollfd *fds = NULL;
1832 struct kqueue *kq = NULL;
91447636
A
1833 int ncoll, error = 0;
1834 u_int nfds = uap->nfds;
1835 u_int rfds = 0;
f427ee49 1836 rlim_t nofile = proc_limitgetcur(p, RLIMIT_NOFILE, TRUE);
1c79356b 1837
91447636
A
1838 /*
1839 * This is kinda bogus. We have fd limits, but that is not
1840 * really related to the size of the pollfd array. Make sure
1841 * we let the process use at least FD_SETSIZE entries and at
1842 * least enough for the current limits. We want to be reasonably
1843 * safe, but not overly restrictive.
1844 */
1845 if (nfds > OPEN_MAX ||
f427ee49 1846 (nfds > nofile && (proc_suser(p) || nfds > FD_SETSIZE))) {
0a7de745
A
1847 return EINVAL;
1848 }
1c79356b 1849
cb323159 1850 kq = kqueue_alloc(p);
0a7de745
A
1851 if (kq == NULL) {
1852 return EAGAIN;
1853 }
91447636 1854
cb323159
A
1855 if (nfds) {
1856 size_t ni = nfds * sizeof(struct pollfd);
1857 MALLOC(fds, struct pollfd *, ni, M_TEMP, M_WAITOK);
1858 if (NULL == fds) {
1859 error = EAGAIN;
1860 goto out;
1861 }
91447636 1862
cb323159
A
1863 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd));
1864 if (error) {
91447636
A
1865 goto out;
1866 }
91447636
A
1867 }
1868
1869 /* JMM - all this P_SELECT stuff is bogus */
1870 ncoll = nselcoll;
b0d623f7 1871 OSBitOrAtomic(P_SELECT, &p->p_flag);
cb323159 1872 for (u_int i = 0; i < nfds; i++) {
91447636 1873 short events = fds[i].events;
d9a64523 1874 __assert_only int rc;
91447636
A
1875
1876 /* per spec, ignore fd values below zero */
1877 if (fds[i].fd < 0) {
1878 fds[i].revents = 0;
1879 continue;
1880 }
1881
1882 /* convert the poll event into a kqueue kevent */
cb323159 1883 struct kevent_qos_s kev = {
3e170ce0
A
1884 .ident = fds[i].fd,
1885 .flags = EV_ADD | EV_ONESHOT | EV_POLL,
0a7de745
A
1886 .udata = CAST_USER_ADDR_T(&fds[i])
1887 };
91447636
A
1888
1889 /* Handle input events */
0a7de745 1890 if (events & (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP)) {
91447636 1891 kev.filter = EVFILT_READ;
0a7de745 1892 if (events & (POLLPRI | POLLRDBAND)) {
91447636 1893 kev.flags |= EV_OOBAND;
0a7de745 1894 }
cb323159 1895 rc = kevent_register(kq, &kev, NULL);
d9a64523 1896 assert((rc & FILTER_REGISTER_WAIT) == 0);
91447636
A
1897 }
1898
1899 /* Handle output events */
39037602 1900 if ((kev.flags & EV_ERROR) == 0 &&
0a7de745 1901 (events & (POLLOUT | POLLWRNORM | POLLWRBAND))) {
91447636 1902 kev.filter = EVFILT_WRITE;
cb323159 1903 rc = kevent_register(kq, &kev, NULL);
d9a64523 1904 assert((rc & FILTER_REGISTER_WAIT) == 0);
91447636
A
1905 }
1906
1907 /* Handle BSD extension vnode events */
39037602 1908 if ((kev.flags & EV_ERROR) == 0 &&
0a7de745 1909 (events & (POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE))) {
91447636
A
1910 kev.filter = EVFILT_VNODE;
1911 kev.fflags = 0;
0a7de745 1912 if (events & POLLEXTEND) {
91447636 1913 kev.fflags |= NOTE_EXTEND;
0a7de745
A
1914 }
1915 if (events & POLLATTRIB) {
91447636 1916 kev.fflags |= NOTE_ATTRIB;
0a7de745
A
1917 }
1918 if (events & POLLNLINK) {
91447636 1919 kev.fflags |= NOTE_LINK;
0a7de745
A
1920 }
1921 if (events & POLLWRITE) {
91447636 1922 kev.fflags |= NOTE_WRITE;
0a7de745 1923 }
cb323159 1924 rc = kevent_register(kq, &kev, NULL);
d9a64523 1925 assert((rc & FILTER_REGISTER_WAIT) == 0);
91447636
A
1926 }
1927
39037602 1928 if (kev.flags & EV_ERROR) {
91447636
A
1929 fds[i].revents = POLLNVAL;
1930 rfds++;
0a7de745 1931 } else {
91447636 1932 fds[i].revents = 0;
0a7de745 1933 }
91447636
A
1934 }
1935
d190cdc3
A
1936 /*
1937 * Did we have any trouble registering?
1938 * If user space passed 0 FDs, then respect any timeout value passed.
1939 * This is an extremely inefficient sleep. If user space passed one or
1940 * more FDs, and we had trouble registering _all_ of them, then bail
1941 * out. If a subset of the provided FDs failed to register, then we
1942 * will still call the kqueue_scan function.
1943 */
0a7de745 1944 if (nfds && (rfds == nfds)) {
91447636 1945 goto done;
0a7de745 1946 }
91447636 1947
cb323159
A
1948 /* scan for, and possibly wait for, the kevents to trigger */
1949 kevent_ctx_t kectx = kevent_get_context(current_thread());
1950 *kectx = (struct kevent_ctx_s){
1951 .kec_process_noutputs = rfds,
1952 .kec_process_flags = KEVENT_FLAG_POLL,
1953 .kec_deadline = 0, /* wait forever */
1954 };
1955
813fb2f6
A
1956 /*
1957 * If any events have trouble registering, an event has fired and we
cb323159 1958 * shouldn't wait for events in kqueue_scan.
813fb2f6 1959 */
0a7de745 1960 if (rfds) {
cb323159
A
1961 kectx->kec_process_flags |= KEVENT_FLAG_IMMEDIATE;
1962 } else if (uap->timeout != -1) {
1963 clock_interval_to_deadline(uap->timeout, NSEC_PER_MSEC,
1964 &kectx->kec_deadline);
0a7de745 1965 }
813fb2f6 1966
cb323159
A
1967 error = kqueue_scan(kq, kectx->kec_process_flags, kectx, poll_callback);
1968 rfds = kectx->kec_process_noutputs;
91447636 1969
0a7de745 1970done:
b0d623f7 1971 OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag);
91447636 1972 /* poll is not restarted after signals... */
0a7de745 1973 if (error == ERESTART) {
91447636 1974 error = EINTR;
0a7de745 1975 }
91447636
A
1976 if (error == 0) {
1977 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd));
1978 *retval = rfds;
1979 }
1980
0a7de745 1981out:
cb323159
A
1982 if (NULL != fds) {
1983 FREE(fds, M_TEMP);
0a7de745 1984 }
91447636 1985
2d21ac55 1986 kqueue_dealloc(kq);
0a7de745 1987 return error;
91447636
A
1988}
1989
cb323159
A
1990static int
1991poll_callback(struct kevent_qos_s *kevp, kevent_ctx_t kectx)
91447636 1992{
91447636 1993 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata);
316670eb 1994 short prev_revents = fds->revents;
04b8595b 1995 short mask = 0;
ff6e181a 1996
91447636 1997 /* convert the results back into revents */
0a7de745 1998 if (kevp->flags & EV_EOF) {
91447636 1999 fds->revents |= POLLHUP;
0a7de745
A
2000 }
2001 if (kevp->flags & EV_ERROR) {
91447636 2002 fds->revents |= POLLERR;
0a7de745 2003 }
91447636
A
2004
2005 switch (kevp->filter) {
2006 case EVFILT_READ:
0a7de745
A
2007 if (fds->revents & POLLHUP) {
2008 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND);
2009 } else {
3e170ce0 2010 mask = (POLLIN | POLLRDNORM);
0a7de745 2011 if (kevp->flags & EV_OOBAND) {
3e170ce0 2012 mask |= (POLLPRI | POLLRDBAND);
0a7de745 2013 }
ff6e181a
A
2014 }
2015 fds->revents |= (fds->events & mask);
91447636
A
2016 break;
2017
2018 case EVFILT_WRITE:
0a7de745
A
2019 if (!(fds->revents & POLLHUP)) {
2020 fds->revents |= (fds->events & (POLLOUT | POLLWRNORM | POLLWRBAND));
2021 }
91447636
A
2022 break;
2023
2d21ac55 2024 case EVFILT_VNODE:
0a7de745 2025 if (kevp->fflags & NOTE_EXTEND) {
91447636 2026 fds->revents |= (fds->events & POLLEXTEND);
0a7de745
A
2027 }
2028 if (kevp->fflags & NOTE_ATTRIB) {
91447636 2029 fds->revents |= (fds->events & POLLATTRIB);
0a7de745
A
2030 }
2031 if (kevp->fflags & NOTE_LINK) {
91447636 2032 fds->revents |= (fds->events & POLLNLINK);
0a7de745
A
2033 }
2034 if (kevp->fflags & NOTE_WRITE) {
91447636 2035 fds->revents |= (fds->events & POLLWRITE);
0a7de745 2036 }
91447636
A
2037 break;
2038 }
2d21ac55 2039
0a7de745 2040 if (fds->revents != 0 && prev_revents == 0) {
cb323159 2041 kectx->kec_process_noutputs++;
0a7de745 2042 }
2d21ac55 2043
91447636
A
2044 return 0;
2045}
0a7de745 2046
91447636
A
2047int
2048seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p)
2049{
0a7de745 2050 return 1;
91447636
A
2051}
2052
6d2010ae
A
2053/*
2054 * selcount
2055 *
2056 * Count the number of bits set in the input bit vector, and establish an
f427ee49 2057 * outstanding fp->fp_iocount for each of the descriptors which will be in
6d2010ae
A
2058 * use in the select operation.
2059 *
2060 * Parameters: p The process doing the select
2061 * ibits The input bit vector
2062 * nfd The number of fd's in the vector
2063 * countp Pointer to where to store the bit count
2064 *
2065 * Returns: 0 Success
2066 * EIO Bad per process open file table
2067 * EBADF One of the bits in the input bit vector
2068 * references an invalid fd
2069 *
2070 * Implicit: *countp (modified) Count of fd's
2071 *
2072 * Notes: This function is the first pass under the proc_fdlock() that
2073 * permits us to recognize invalid descriptors in the bit vector;
2074 * the may, however, not remain valid through the drop and
2075 * later reacquisition of the proc_fdlock().
2076 */
91447636 2077static int
6d2010ae 2078selcount(struct proc *p, u_int32_t *ibits, int nfd, int *countp)
91447636 2079{
2d21ac55
A
2080 struct filedesc *fdp = p->p_fd;
2081 int msk, i, j, fd;
2082 u_int32_t bits;
91447636 2083 struct fileproc *fp;
0b4e3aa0 2084 int n = 0;
91447636 2085 u_int32_t *iptr;
0b4e3aa0 2086 u_int nw;
0a7de745 2087 int error = 0;
6d2010ae 2088 int need_wakeup = 0;
0b4e3aa0
A
2089
2090 /*
2091 * Problems when reboot; due to MacOSX signal probs
2092 * in Beaker1C ; verify that the p->p_fd is valid
2093 */
2094 if (fdp == NULL) {
2d21ac55 2095 *countp = 0;
0a7de745 2096 return EIO;
0b4e3aa0 2097 }
0b4e3aa0
A
2098 nw = howmany(nfd, NFDBITS);
2099
91447636 2100 proc_fdlock(p);
0b4e3aa0
A
2101 for (msk = 0; msk < 3; msk++) {
2102 iptr = (u_int32_t *)&ibits[msk * nw];
2103 for (i = 0; i < nfd; i += NFDBITS) {
0a7de745 2104 bits = iptr[i / NFDBITS];
0b4e3aa0 2105 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
cb323159 2106 bits &= ~(1U << j);
fe8ab488 2107
f427ee49
A
2108 fp = fp_get_noref_locked(p, fd);
2109 if (fp == NULL) {
0a7de745
A
2110 *countp = 0;
2111 error = EBADF;
2112 goto bad;
0b4e3aa0 2113 }
f427ee49 2114 os_ref_retain_locked(&fp->fp_iocount);
0b4e3aa0
A
2115 n++;
2116 }
2117 }
2118 }
91447636
A
2119 proc_fdunlock(p);
2120
2d21ac55 2121 *countp = n;
0a7de745 2122 return 0;
6d2010ae 2123
91447636 2124bad:
0a7de745 2125 if (n == 0) {
91447636 2126 goto out;
0a7de745 2127 }
6d2010ae 2128 /* Ignore error return; it's already EBADF */
f427ee49 2129 (void)seldrop_locked(p, ibits, nfd, n, &need_wakeup);
91447636 2130
91447636
A
2131out:
2132 proc_fdunlock(p);
6d2010ae
A
2133 if (need_wakeup) {
2134 wakeup(&p->p_fpdrainwait);
2135 }
0a7de745 2136 return error;
91447636
A
2137}
2138
6d2010ae
A
2139
2140/*
2141 * seldrop_locked
2142 *
2143 * Drop outstanding wait queue references set up during selscan(); drop the
f427ee49 2144 * outstanding per fileproc fp_iocount picked up during the selcount().
6d2010ae
A
2145 *
2146 * Parameters: p Process performing the select
3e170ce0 2147 * ibits Input bit bector of fd's
6d2010ae
A
2148 * nfd Number of fd's
2149 * lim Limit to number of vector entries to
2150 * consider, or -1 for "all"
2151 * inselect True if
2152 * need_wakeup Pointer to flag to set to do a wakeup
2153 * if f_iocont on any descriptor goes to 0
2154 *
2155 * Returns: 0 Success
2156 * EBADF One or more fds in the bit vector
2157 * were invalid, but the rest
2158 * were successfully dropped
2159 *
2160 * Notes: An fd make become bad while the proc_fdlock() is not held,
2161 * if a multithreaded application closes the fd out from under
2162 * the in progress select. In this case, we still have to
2163 * clean up after the set up on the remaining fds.
2164 */
91447636 2165static int
f427ee49 2166seldrop_locked(struct proc *p, u_int32_t *ibits, int nfd, int lim, int *need_wakeup)
91447636 2167{
2d21ac55 2168 struct filedesc *fdp = p->p_fd;
3e170ce0 2169 int msk, i, j, nc, fd;
2d21ac55 2170 u_int32_t bits;
91447636 2171 struct fileproc *fp;
91447636
A
2172 u_int32_t *iptr;
2173 u_int nw;
6d2010ae 2174 int error = 0;
6d2010ae 2175 uthread_t uth = get_bsdthread_info(current_thread());
3e170ce0 2176 struct _select_data *seldata;
6d2010ae
A
2177
2178 *need_wakeup = 0;
91447636
A
2179
2180 /*
2181 * Problems when reboot; due to MacOSX signal probs
2182 * in Beaker1C ; verify that the p->p_fd is valid
2183 */
2184 if (fdp == NULL) {
0a7de745 2185 return EIO;
91447636
A
2186 }
2187
2188 nw = howmany(nfd, NFDBITS);
d9a64523 2189 seldata = &uth->uu_save.uus_select_data;
91447636 2190
3e170ce0 2191 nc = 0;
91447636
A
2192 for (msk = 0; msk < 3; msk++) {
2193 iptr = (u_int32_t *)&ibits[msk * nw];
2194 for (i = 0; i < nfd; i += NFDBITS) {
0a7de745 2195 bits = iptr[i / NFDBITS];
91447636 2196 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
cb323159 2197 bits &= ~(1U << j);
6d2010ae
A
2198 /*
2199 * If we've already dropped as many as were
0a7de745 2200 * counted/scanned, then we are done.
6d2010ae 2201 */
f427ee49 2202 if (nc >= lim) {
6d2010ae 2203 goto done;
0a7de745 2204 }
6d2010ae 2205
3e170ce0 2206 /*
f427ee49
A
2207 * We took an I/O reference in selcount,
2208 * so the fp can't possibly be NULL.
3e170ce0 2209 */
f427ee49 2210 fp = fp_get_noref_locked_with_iocount(p, fd);
3e170ce0 2211 selunlinkfp(fp,
0a7de745
A
2212 seldata->wqp ? seldata->wqp[nc] : 0,
2213 uth->uu_wqset);
3e170ce0
A
2214
2215 nc++;
2216
f427ee49 2217 const os_ref_count_t refc = os_ref_release_locked(&fp->fp_iocount);
cb323159 2218 if (0 == refc) {
f427ee49 2219 panic("fp_iocount overdecrement!");
0a7de745 2220 }
6d2010ae 2221
cb323159 2222 if (1 == refc) {
6d2010ae
A
2223 /*
2224 * The last iocount is responsible for clearing
2225 * selconfict flag - even if we didn't set it -
2226 * and is also responsible for waking up anyone
2227 * waiting on iocounts to drain.
2228 */
f427ee49
A
2229 if (fp->fp_flags & FP_SELCONFLICT) {
2230 fp->fp_flags &= ~FP_SELCONFLICT;
0a7de745 2231 }
6d2010ae
A
2232 if (p->p_fpdrainwait) {
2233 p->p_fpdrainwait = 0;
2234 *need_wakeup = 1;
2235 }
91447636
A
2236 }
2237 }
2238 }
2239 }
6d2010ae 2240done:
0a7de745 2241 return error;
6d2010ae
A
2242}
2243
2244
2245static int
f427ee49 2246seldrop(struct proc *p, u_int32_t *ibits, int nfd, int lim)
6d2010ae
A
2247{
2248 int error;
2249 int need_wakeup = 0;
2250
2251 proc_fdlock(p);
f427ee49 2252 error = seldrop_locked(p, ibits, nfd, lim, &need_wakeup);
91447636 2253 proc_fdunlock(p);
6d2010ae
A
2254 if (need_wakeup) {
2255 wakeup(&p->p_fpdrainwait);
2256 }
0a7de745 2257 return error;
0b4e3aa0
A
2258}
2259
1c79356b
A
2260/*
2261 * Record a select request.
2262 */
2263void
3e170ce0 2264selrecord(__unused struct proc *selector, struct selinfo *sip, void *s_data)
1c79356b 2265{
0a7de745 2266 thread_t cur_act = current_thread();
0b4e3aa0 2267 struct uthread * ut = get_bsdthread_info(cur_act);
3e170ce0
A
2268 /* on input, s_data points to the 64-bit ID of a reserved link object */
2269 uint64_t *reserved_link = (uint64_t *)s_data;
1c79356b 2270
0b4e3aa0
A
2271 /* need to look at collisions */
2272
0b4e3aa0 2273 /*do not record if this is second pass of select */
0a7de745 2274 if (!s_data) {
0b4e3aa0 2275 return;
0a7de745 2276 }
1c79356b 2277
0b4e3aa0 2278 if ((sip->si_flags & SI_INITED) == 0) {
39037602 2279 waitq_init(&sip->si_waitq, SYNC_POLICY_FIFO);
0b4e3aa0
A
2280 sip->si_flags |= SI_INITED;
2281 sip->si_flags &= ~SI_CLEAR;
2282 }
2283
0a7de745 2284 if (sip->si_flags & SI_RECORDED) {
0b4e3aa0 2285 sip->si_flags |= SI_COLL;
0a7de745 2286 } else {
0b4e3aa0 2287 sip->si_flags &= ~SI_COLL;
0a7de745 2288 }
0b4e3aa0
A
2289
2290 sip->si_flags |= SI_RECORDED;
3e170ce0
A
2291 /* note: this checks for pre-existing linkage */
2292 waitq_link(&sip->si_waitq, ut->uu_wqset,
0a7de745 2293 WAITQ_SHOULD_LOCK, reserved_link);
3e170ce0
A
2294
2295 /*
2296 * Always consume the reserved link.
2297 * We can always call waitq_link_release() safely because if
2298 * waitq_link is successful, it consumes the link and resets the
2299 * value to 0, in which case our call to release becomes a no-op.
2300 * If waitq_link fails, then the following release call will actually
2301 * release the reserved link object.
2302 */
2303 waitq_link_release(*reserved_link);
2304 *reserved_link = 0;
2305
2306 /*
2307 * Use the s_data pointer as an output parameter as well
2308 * This avoids changing the prototype for this function which is
2309 * used by many kexts. We need to surface the waitq object
2310 * associated with the selinfo we just added to the thread's select
2311 * set. New waitq sets do not have back-pointers to set members, so
2312 * the only way to clear out set linkage objects is to go from the
2313 * waitq to the set. We use a memcpy because s_data could be
2314 * pointing to an unaligned value on the stack
2315 * (especially on 32-bit systems)
2316 */
2317 void *wqptr = (void *)&sip->si_waitq;
2318 memcpy((void *)s_data, (void *)&wqptr, sizeof(void *));
0b4e3aa0 2319
1c79356b
A
2320 return;
2321}
2322
2323void
2d21ac55 2324selwakeup(struct selinfo *sip)
1c79356b 2325{
0b4e3aa0 2326 if ((sip->si_flags & SI_INITED) == 0) {
1c79356b 2327 return;
0b4e3aa0 2328 }
1c79356b
A
2329
2330 if (sip->si_flags & SI_COLL) {
2331 nselcoll++;
2332 sip->si_flags &= ~SI_COLL;
0b4e3aa0
A
2333#if 0
2334 /* will not support */
2335 //wakeup((caddr_t)&selwait);
2336#endif
1c79356b 2337 }
1c79356b 2338
0b4e3aa0 2339 if (sip->si_flags & SI_RECORDED) {
3e170ce0 2340 waitq_wakeup64_all(&sip->si_waitq, NO_EVENT64,
0a7de745 2341 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
0b4e3aa0 2342 sip->si_flags &= ~SI_RECORDED;
1c79356b 2343 }
1c79356b
A
2344}
2345
0a7de745 2346void
2d21ac55 2347selthreadclear(struct selinfo *sip)
1c79356b 2348{
3e170ce0 2349 struct waitq *wq;
1c79356b 2350
0b4e3aa0
A
2351 if ((sip->si_flags & SI_INITED) == 0) {
2352 return;
2353 }
2354 if (sip->si_flags & SI_RECORDED) {
0a7de745
A
2355 selwakeup(sip);
2356 sip->si_flags &= ~(SI_RECORDED | SI_COLL);
1c79356b 2357 }
0b4e3aa0 2358 sip->si_flags |= SI_CLEAR;
3e170ce0
A
2359 sip->si_flags &= ~SI_INITED;
2360
2361 wq = &sip->si_waitq;
2362
2363 /*
2364 * Higher level logic may have a handle on this waitq's prepost ID,
2365 * but that's OK because the waitq_deinit will remove/invalidate the
2366 * prepost object (as well as mark the waitq invalid). This de-couples
2367 * us from any callers that may have a handle to this waitq via the
2368 * prepost ID.
2369 */
2370 waitq_deinit(wq);
1c79356b
A
2371}
2372
2373
1c79356b 2374/*
f427ee49
A
2375 * gethostuuid
2376 *
2377 * Description: Get the host UUID from IOKit and return it to user space.
2378 *
2379 * Parameters: uuid_buf Pointer to buffer to receive UUID
2380 * timeout Timespec for timout
2381 *
2382 * Returns: 0 Success
2383 * EWOULDBLOCK Timeout is too short
2384 * copyout:EFAULT Bad user buffer
2385 * mac_system_check_info:EPERM Client not allowed to perform this operation
2386 *
2387 * Notes: A timeout seems redundant, since if it's tolerable to not
2388 * have a system UUID in hand, then why ask for one?
1c79356b 2389 */
f427ee49
A
2390int
2391gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval)
1c79356b 2392{
f427ee49
A
2393 kern_return_t kret;
2394 int error;
2395 mach_timespec_t mach_ts; /* for IOKit call */
2396 __darwin_uuid_t uuid_kern = {}; /* for IOKit call */
1c79356b 2397
f427ee49
A
2398 /* Check entitlement */
2399 if (!IOTaskHasEntitlement(current_task(), "com.apple.private.getprivatesysid")) {
2400#if !defined(XNU_TARGET_OS_OSX)
2401#if CONFIG_MACF
2402 if ((error = mac_system_check_info(kauth_cred_get(), "hw.uuid")) != 0) {
2403 /* EPERM invokes userspace upcall if present */
2404 return error;
91447636 2405 }
f427ee49
A
2406#endif
2407#endif
39236c6e
A
2408 }
2409
2d21ac55 2410 /* Convert the 32/64 bit timespec into a mach_timespec_t */
0a7de745 2411 if (proc_is64bit(p)) {
b0d623f7 2412 struct user64_timespec ts;
2d21ac55 2413 error = copyin(uap->timeoutp, &ts, sizeof(ts));
0a7de745
A
2414 if (error) {
2415 return error;
2416 }
f427ee49
A
2417 mach_ts.tv_sec = (unsigned int)ts.tv_sec;
2418 mach_ts.tv_nsec = (clock_res_t)ts.tv_nsec;
2d21ac55 2419 } else {
b0d623f7 2420 struct user32_timespec ts;
0a7de745
A
2421 error = copyin(uap->timeoutp, &ts, sizeof(ts));
2422 if (error) {
2423 return error;
2424 }
2d21ac55
A
2425 mach_ts.tv_sec = ts.tv_sec;
2426 mach_ts.tv_nsec = ts.tv_nsec;
2427 }
2428
2429 /* Call IOKit with the stack buffer to get the UUID */
2430 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts);
2431
2432 /*
2433 * If we get it, copy out the data to the user buffer; note that a
2434 * uuid_t is an array of characters, so this is size invariant for
2435 * 32 vs. 64 bit.
2436 */
2437 if (kret == KERN_SUCCESS) {
2438 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern));
2439 } else {
2440 error = EWOULDBLOCK;
2441 }
2442
0a7de745 2443 return error;
2d21ac55 2444}
316670eb
A
2445
2446/*
2447 * ledger
2448 *
2449 * Description: Omnibus system call for ledger operations
2450 */
2451int
2452ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval)
2453{
39236c6e
A
2454#if !CONFIG_MACF
2455#pragma unused(p)
2456#endif
316670eb
A
2457 int rval, pid, len, error;
2458#ifdef LEDGER_DEBUG
2459 struct ledger_limit_args lla;
2460#endif
2461 task_t task;
2462 proc_t proc;
2463
2464 /* Finish copying in the necessary args before taking the proc lock */
2465 error = 0;
2466 len = 0;
0a7de745
A
2467 if (args->cmd == LEDGER_ENTRY_INFO) {
2468 error = copyin(args->arg3, (char *)&len, sizeof(len));
2469 } else if (args->cmd == LEDGER_TEMPLATE_INFO) {
2470 error = copyin(args->arg2, (char *)&len, sizeof(len));
2471 } else if (args->cmd == LEDGER_LIMIT)
813fb2f6 2472#ifdef LEDGER_DEBUG
0a7de745 2473 { error = copyin(args->arg2, (char *)&lla, sizeof(lla));}
813fb2f6 2474#else
0a7de745 2475 { return EINVAL; }
316670eb 2476#endif
0a7de745
A
2477 else if ((args->cmd < 0) || (args->cmd > LEDGER_MAX_CMD)) {
2478 return EINVAL;
2479 }
ecc0ceb4 2480
0a7de745
A
2481 if (error) {
2482 return error;
2483 }
2484 if (len < 0) {
2485 return EINVAL;
2486 }
316670eb
A
2487
2488 rval = 0;
2489 if (args->cmd != LEDGER_TEMPLATE_INFO) {
f427ee49 2490 pid = (int)args->arg1;
316670eb 2491 proc = proc_find(pid);
0a7de745
A
2492 if (proc == NULL) {
2493 return ESRCH;
2494 }
316670eb
A
2495
2496#if CONFIG_MACF
2497 error = mac_proc_check_ledger(p, proc, args->cmd);
2498 if (error) {
2499 proc_rele(proc);
0a7de745 2500 return error;
316670eb
A
2501 }
2502#endif
2503
2504 task = proc->task;
2505 }
0a7de745 2506
316670eb
A
2507 switch (args->cmd) {
2508#ifdef LEDGER_DEBUG
0a7de745
A
2509 case LEDGER_LIMIT: {
2510 if (!kauth_cred_issuser(kauth_cred_get())) {
2511 rval = EPERM;
316670eb 2512 }
0a7de745
A
2513 rval = ledger_limit(task, &lla);
2514 proc_rele(proc);
2515 break;
2516 }
316670eb 2517#endif
0a7de745
A
2518 case LEDGER_INFO: {
2519 struct ledger_info info = {};
2520
2521 rval = ledger_info(task, &info);
2522 proc_rele(proc);
2523 if (rval == 0) {
2524 rval = copyout(&info, args->arg2,
2525 sizeof(info));
316670eb 2526 }
0a7de745
A
2527 break;
2528 }
316670eb 2529
0a7de745
A
2530 case LEDGER_ENTRY_INFO: {
2531 void *buf;
2532 int sz;
316670eb 2533
0a7de745
A
2534 rval = ledger_get_task_entry_info_multiple(task, &buf, &len);
2535 proc_rele(proc);
2536 if ((rval == 0) && (len >= 0)) {
2537 sz = len * sizeof(struct ledger_entry_info);
2538 rval = copyout(buf, args->arg2, sz);
f427ee49 2539 kheap_free(KHEAP_DATA_BUFFERS, buf, sz);
0a7de745
A
2540 }
2541 if (rval == 0) {
2542 rval = copyout(&len, args->arg3, sizeof(len));
316670eb 2543 }
0a7de745
A
2544 break;
2545 }
316670eb 2546
0a7de745
A
2547 case LEDGER_TEMPLATE_INFO: {
2548 void *buf;
2549 int sz;
316670eb 2550
0a7de745
A
2551 rval = ledger_template_info(&buf, &len);
2552 if ((rval == 0) && (len >= 0)) {
2553 sz = len * sizeof(struct ledger_template_info);
2554 rval = copyout(buf, args->arg1, sz);
f427ee49 2555 kheap_free(KHEAP_DATA_BUFFERS, buf, sz);
316670eb 2556 }
0a7de745
A
2557 if (rval == 0) {
2558 rval = copyout(&len, args->arg2, sizeof(len));
2559 }
2560 break;
2561 }
316670eb 2562
0a7de745
A
2563 default:
2564 panic("ledger syscall logic error -- command type %d", args->cmd);
2565 proc_rele(proc);
2566 rval = EINVAL;
316670eb
A
2567 }
2568
0a7de745 2569 return rval;
316670eb 2570}
39236c6e 2571
39236c6e
A
2572int
2573telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval)
2574{
2575 int error = 0;
2576
2577 switch (args->cmd) {
fe8ab488 2578#if CONFIG_TELEMETRY
39236c6e
A
2579 case TELEMETRY_CMD_TIMER_EVENT:
2580 error = telemetry_timer_event(args->deadline, args->interval, args->leeway);
2581 break;
d9a64523
A
2582 case TELEMETRY_CMD_PMI_SETUP:
2583 error = telemetry_pmi_setup((enum telemetry_pmi)args->deadline, args->interval);
2584 break;
fe8ab488
A
2585#endif /* CONFIG_TELEMETRY */
2586 case TELEMETRY_CMD_VOUCHER_NAME:
0a7de745 2587 if (thread_set_voucher_name((mach_port_name_t)args->deadline)) {
fe8ab488 2588 error = EINVAL;
0a7de745 2589 }
fe8ab488
A
2590 break;
2591
39236c6e
A
2592 default:
2593 error = EINVAL;
2594 break;
2595 }
2596
0a7de745 2597 return error;
39236c6e 2598}
3e170ce0 2599
cb323159
A
2600/*
2601 * Logging
2602 *
2603 * Description: syscall to access kernel logging from userspace
2604 *
2605 * Args:
2606 * tag - used for syncing with userspace on the version.
2607 * flags - flags used by the syscall.
2608 * buffer - userspace address of string to copy.
2609 * size - size of buffer.
2610 */
2611int
2612log_data(__unused struct proc *p, struct log_data_args *args, int *retval)
2613{
2614 unsigned int tag = args->tag;
2615 unsigned int flags = args->flags;
2616 user_addr_t buffer = args->buffer;
2617 unsigned int size = args->size;
2618 int ret = 0;
2619 char *log_msg = NULL;
2620 int error;
2621 *retval = 0;
2622
2623 /*
2624 * Tag synchronize the syscall version with userspace.
2625 * Tag == 0 => flags == OS_LOG_TYPE
2626 */
2627 if (tag != 0) {
2628 return EINVAL;
2629 }
2630
2631 /*
2632 * OS_LOG_TYPE are defined in libkern/os/log.h
2633 * In userspace they are defined in libtrace/os/log.h
2634 */
2635 if (flags != OS_LOG_TYPE_DEFAULT &&
2636 flags != OS_LOG_TYPE_INFO &&
2637 flags != OS_LOG_TYPE_DEBUG &&
2638 flags != OS_LOG_TYPE_ERROR &&
2639 flags != OS_LOG_TYPE_FAULT) {
2640 return EINVAL;
2641 }
2642
2643 if (size == 0) {
2644 return EINVAL;
2645 }
2646
2647 /* truncate to OS_LOG_DATA_MAX_SIZE */
2648 if (size > OS_LOG_DATA_MAX_SIZE) {
f427ee49
A
2649 printf("%s: WARNING msg is going to be truncated from %u to %u\n",
2650 __func__, size, OS_LOG_DATA_MAX_SIZE);
cb323159
A
2651 size = OS_LOG_DATA_MAX_SIZE;
2652 }
2653
f427ee49 2654 log_msg = kheap_alloc(KHEAP_TEMP, size, Z_WAITOK);
cb323159
A
2655 if (!log_msg) {
2656 return ENOMEM;
2657 }
2658
2659 error = copyin(buffer, log_msg, size);
2660 if (error) {
2661 ret = EFAULT;
2662 goto out;
2663 }
2664 log_msg[size - 1] = '\0';
2665
2666 /*
2667 * This will log to dmesg and logd.
2668 * The call will fail if the current
2669 * process is not a driverKit process.
2670 */
f427ee49 2671 os_log_driverKit(&ret, OS_LOG_DEFAULT, (os_log_type_t)flags, "%s", log_msg);
cb323159
A
2672
2673out:
2674 if (log_msg != NULL) {
f427ee49 2675 kheap_free(KHEAP_TEMP, log_msg, size);
cb323159
A
2676 }
2677
2678 return ret;
2679}
2680
5ba3f43e 2681#if DEVELOPMENT || DEBUG
3e170ce0
A
2682#if CONFIG_WAITQ_DEBUG
2683static uint64_t g_wqset_num = 0;
2684struct g_wqset {
2685 queue_chain_t link;
2686 struct waitq_set *wqset;
2687};
2688
2689static queue_head_t g_wqset_list;
2690static struct waitq_set *g_waitq_set = NULL;
2691
0a7de745
A
2692static inline struct waitq_set *
2693sysctl_get_wqset(int idx)
3e170ce0
A
2694{
2695 struct g_wqset *gwqs;
2696
0a7de745 2697 if (!g_wqset_num) {
3e170ce0 2698 queue_init(&g_wqset_list);
0a7de745 2699 }
3e170ce0
A
2700
2701 /* don't bother with locks: this is test-only code! */
2702 qe_foreach_element(gwqs, &g_wqset_list, link) {
0a7de745 2703 if ((int)(wqset_id(gwqs->wqset) & 0xffffffff) == idx) {
3e170ce0 2704 return gwqs->wqset;
0a7de745 2705 }
3e170ce0
A
2706 }
2707
2708 /* allocate a new one */
2709 ++g_wqset_num;
2710 gwqs = (struct g_wqset *)kalloc(sizeof(*gwqs));
2711 assert(gwqs != NULL);
2712
0a7de745 2713 gwqs->wqset = waitq_set_alloc(SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST, NULL);
3e170ce0
A
2714 enqueue_tail(&g_wqset_list, &gwqs->link);
2715 printf("[WQ]: created new waitq set 0x%llx\n", wqset_id(gwqs->wqset));
2716
2717 return gwqs->wqset;
2718}
2719
2720#define MAX_GLOBAL_TEST_QUEUES 64
2721static int g_wq_init = 0;
2722static struct waitq g_wq[MAX_GLOBAL_TEST_QUEUES];
2723
0a7de745
A
2724static inline struct waitq *
2725global_test_waitq(int idx)
3e170ce0 2726{
0a7de745 2727 if (idx < 0) {
3e170ce0 2728 return NULL;
0a7de745 2729 }
3e170ce0
A
2730
2731 if (!g_wq_init) {
2732 g_wq_init = 1;
0a7de745 2733 for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++) {
39037602 2734 waitq_init(&g_wq[i], SYNC_POLICY_FIFO);
0a7de745 2735 }
3e170ce0
A
2736 }
2737
2738 return &g_wq[idx % MAX_GLOBAL_TEST_QUEUES];
2739}
2740
2741static int sysctl_waitq_wakeup_one SYSCTL_HANDLER_ARGS
2742{
2743#pragma unused(oidp, arg1, arg2)
2744 int error;
2745 int index;
2746 struct waitq *waitq;
2747 kern_return_t kr;
2748 int64_t event64 = 0;
2749
2750 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 2751 if (error) {
3e170ce0 2752 return error;
0a7de745 2753 }
3e170ce0 2754
0a7de745 2755 if (!req->newptr) {
3e170ce0 2756 return SYSCTL_OUT(req, &event64, sizeof(event64));
0a7de745 2757 }
3e170ce0
A
2758
2759 if (event64 < 0) {
2760 index = (int)((-event64) & 0xffffffff);
2761 waitq = wqset_waitq(sysctl_get_wqset(index));
2762 index = -index;
2763 } else {
2764 index = (int)event64;
2765 waitq = global_test_waitq(index);
2766 }
2767
2768 event64 = 0;
2769
2770 printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
0a7de745 2771 index, event64);
3e170ce0 2772 kr = waitq_wakeup64_one(waitq, (event64_t)event64, THREAD_AWAKENED,
0a7de745 2773 WAITQ_ALL_PRIORITIES);
3e170ce0
A
2774 printf("[WQ]: \tkr=%d\n", kr);
2775
2776 return SYSCTL_OUT(req, &kr, sizeof(kr));
2777}
2778SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_one, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2779 0, 0, sysctl_waitq_wakeup_one, "Q", "wakeup one thread waiting on given event");
3e170ce0
A
2780
2781
2782static int sysctl_waitq_wakeup_all SYSCTL_HANDLER_ARGS
2783{
2784#pragma unused(oidp, arg1, arg2)
2785 int error;
2786 int index;
2787 struct waitq *waitq;
2788 kern_return_t kr;
2789 int64_t event64 = 0;
2790
2791 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 2792 if (error) {
3e170ce0 2793 return error;
0a7de745 2794 }
3e170ce0 2795
0a7de745 2796 if (!req->newptr) {
3e170ce0 2797 return SYSCTL_OUT(req, &event64, sizeof(event64));
0a7de745 2798 }
3e170ce0
A
2799
2800 if (event64 < 0) {
2801 index = (int)((-event64) & 0xffffffff);
2802 waitq = wqset_waitq(sysctl_get_wqset(index));
2803 index = -index;
2804 } else {
2805 index = (int)event64;
2806 waitq = global_test_waitq(index);
2807 }
2808
2809 event64 = 0;
2810
2811 printf("[WQ]: Waking all threads on waitq [%d] event:0x%llx\n",
0a7de745 2812 index, event64);
3e170ce0 2813 kr = waitq_wakeup64_all(waitq, (event64_t)event64,
0a7de745 2814 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
3e170ce0
A
2815 printf("[WQ]: \tkr=%d\n", kr);
2816
2817 return SYSCTL_OUT(req, &kr, sizeof(kr));
2818}
2819SYSCTL_PROC(_kern, OID_AUTO, waitq_wakeup_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2820 0, 0, sysctl_waitq_wakeup_all, "Q", "wakeup all threads waiting on given event");
3e170ce0
A
2821
2822
2823static int sysctl_waitq_wait SYSCTL_HANDLER_ARGS
2824{
2825#pragma unused(oidp, arg1, arg2)
2826 int error;
2827 int index;
2828 struct waitq *waitq;
2829 kern_return_t kr;
2830 int64_t event64 = 0;
2831
2832 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 2833 if (error) {
3e170ce0 2834 return error;
0a7de745 2835 }
3e170ce0 2836
0a7de745 2837 if (!req->newptr) {
3e170ce0 2838 return SYSCTL_OUT(req, &event64, sizeof(event64));
0a7de745 2839 }
3e170ce0
A
2840
2841 if (event64 < 0) {
2842 index = (int)((-event64) & 0xffffffff);
2843 waitq = wqset_waitq(sysctl_get_wqset(index));
2844 index = -index;
2845 } else {
2846 index = (int)event64;
2847 waitq = global_test_waitq(index);
2848 }
2849
2850 event64 = 0;
2851
2852 printf("[WQ]: Current thread waiting on waitq [%d] event:0x%llx\n",
0a7de745 2853 index, event64);
3e170ce0 2854 kr = waitq_assert_wait64(waitq, (event64_t)event64, THREAD_INTERRUPTIBLE, 0);
0a7de745 2855 if (kr == THREAD_WAITING) {
3e170ce0 2856 thread_block(THREAD_CONTINUE_NULL);
0a7de745 2857 }
3e170ce0
A
2858 printf("[WQ]: \tWoke Up: kr=%d\n", kr);
2859
2860 return SYSCTL_OUT(req, &kr, sizeof(kr));
2861}
2862SYSCTL_PROC(_kern, OID_AUTO, waitq_wait, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2863 0, 0, sysctl_waitq_wait, "Q", "start waiting on given event");
3e170ce0
A
2864
2865
2866static int sysctl_wqset_select SYSCTL_HANDLER_ARGS
2867{
2868#pragma unused(oidp, arg1, arg2)
2869 int error;
2870 struct waitq_set *wqset;
2871 uint64_t event64 = 0;
2872
2873 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 2874 if (error) {
3e170ce0 2875 return error;
0a7de745 2876 }
3e170ce0 2877
0a7de745 2878 if (!req->newptr) {
3e170ce0 2879 goto out;
0a7de745 2880 }
3e170ce0
A
2881
2882 wqset = sysctl_get_wqset((int)(event64 & 0xffffffff));
2883 g_waitq_set = wqset;
2884
2885 event64 = wqset_id(wqset);
2886 printf("[WQ]: selected wqset 0x%llx\n", event64);
2887
2888out:
0a7de745 2889 if (g_waitq_set) {
3e170ce0 2890 event64 = wqset_id(g_waitq_set);
0a7de745 2891 } else {
3e170ce0 2892 event64 = (uint64_t)(-1);
0a7de745 2893 }
3e170ce0
A
2894
2895 return SYSCTL_OUT(req, &event64, sizeof(event64));
2896}
2897SYSCTL_PROC(_kern, OID_AUTO, wqset_select, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2898 0, 0, sysctl_wqset_select, "Q", "select/create a global waitq set");
3e170ce0
A
2899
2900
2901static int sysctl_waitq_link SYSCTL_HANDLER_ARGS
2902{
2903#pragma unused(oidp, arg1, arg2)
2904 int error;
2905 int index;
2906 struct waitq *waitq;
2907 struct waitq_set *wqset;
2908 kern_return_t kr;
2909 uint64_t reserved_link = 0;
2910 int64_t event64 = 0;
2911
2912 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 2913 if (error) {
3e170ce0 2914 return error;
0a7de745 2915 }
3e170ce0 2916
0a7de745 2917 if (!req->newptr) {
3e170ce0 2918 return SYSCTL_OUT(req, &event64, sizeof(event64));
0a7de745 2919 }
3e170ce0 2920
0a7de745 2921 if (!g_waitq_set) {
3e170ce0 2922 g_waitq_set = sysctl_get_wqset(1);
0a7de745 2923 }
3e170ce0
A
2924 wqset = g_waitq_set;
2925
2926 if (event64 < 0) {
2927 struct waitq_set *tmp;
2928 index = (int)((-event64) & 0xffffffff);
2929 tmp = sysctl_get_wqset(index);
0a7de745 2930 if (tmp == wqset) {
3e170ce0 2931 goto out;
0a7de745 2932 }
3e170ce0
A
2933 waitq = wqset_waitq(tmp);
2934 index = -index;
2935 } else {
2936 index = (int)event64;
2937 waitq = global_test_waitq(index);
2938 }
2939
2940 printf("[WQ]: linking waitq [%d] to global wqset (0x%llx)\n",
0a7de745 2941 index, wqset_id(wqset));
3e170ce0
A
2942 reserved_link = waitq_link_reserve(waitq);
2943 kr = waitq_link(waitq, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
2944 waitq_link_release(reserved_link);
2945
2946 printf("[WQ]: \tkr=%d\n", kr);
2947
2948out:
2949 return SYSCTL_OUT(req, &kr, sizeof(kr));
2950}
2951SYSCTL_PROC(_kern, OID_AUTO, waitq_link, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2952 0, 0, sysctl_waitq_link, "Q", "link global waitq to test waitq set");
3e170ce0
A
2953
2954
2955static int sysctl_waitq_unlink SYSCTL_HANDLER_ARGS
2956{
2957#pragma unused(oidp, arg1, arg2)
2958 int error;
2959 int index;
2960 struct waitq *waitq;
2961 struct waitq_set *wqset;
2962 kern_return_t kr;
2963 uint64_t event64 = 0;
2964
2965 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 2966 if (error) {
3e170ce0 2967 return error;
0a7de745 2968 }
3e170ce0 2969
0a7de745 2970 if (!req->newptr) {
3e170ce0 2971 return SYSCTL_OUT(req, &event64, sizeof(event64));
0a7de745 2972 }
3e170ce0 2973
0a7de745 2974 if (!g_waitq_set) {
3e170ce0 2975 g_waitq_set = sysctl_get_wqset(1);
0a7de745 2976 }
3e170ce0
A
2977 wqset = g_waitq_set;
2978
2979 index = (int)event64;
2980 waitq = global_test_waitq(index);
2981
2982 printf("[WQ]: unlinking waitq [%d] from global wqset (0x%llx)\n",
0a7de745 2983 index, wqset_id(wqset));
3e170ce0
A
2984
2985 kr = waitq_unlink(waitq, wqset);
2986 printf("[WQ]: \tkr=%d\n", kr);
2987
2988 return SYSCTL_OUT(req, &kr, sizeof(kr));
2989}
2990SYSCTL_PROC(_kern, OID_AUTO, waitq_unlink, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2991 0, 0, sysctl_waitq_unlink, "Q", "unlink global waitq from test waitq set");
3e170ce0
A
2992
2993
2994static int sysctl_waitq_clear_prepost SYSCTL_HANDLER_ARGS
2995{
2996#pragma unused(oidp, arg1, arg2)
2997 struct waitq *waitq;
2998 uint64_t event64 = 0;
2999 int error, index;
3000
3001 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 3002 if (error) {
3e170ce0 3003 return error;
0a7de745 3004 }
3e170ce0 3005
0a7de745 3006 if (!req->newptr) {
3e170ce0 3007 return SYSCTL_OUT(req, &event64, sizeof(event64));
0a7de745 3008 }
3e170ce0
A
3009
3010 index = (int)event64;
3011 waitq = global_test_waitq(index);
3012
3013 printf("[WQ]: clearing prepost on waitq [%d]\n", index);
3014 waitq_clear_prepost(waitq);
3015
3016 return SYSCTL_OUT(req, &event64, sizeof(event64));
3017}
3018SYSCTL_PROC(_kern, OID_AUTO, waitq_clear_prepost, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 3019 0, 0, sysctl_waitq_clear_prepost, "Q", "clear prepost on given waitq");
3e170ce0
A
3020
3021
3022static int sysctl_wqset_unlink_all SYSCTL_HANDLER_ARGS
3023{
3024#pragma unused(oidp, arg1, arg2)
3025 int error;
3026 struct waitq_set *wqset;
3027 kern_return_t kr;
3028 uint64_t event64 = 0;
3029
3030 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 3031 if (error) {
3e170ce0 3032 return error;
0a7de745 3033 }
3e170ce0 3034
0a7de745 3035 if (!req->newptr) {
3e170ce0 3036 return SYSCTL_OUT(req, &event64, sizeof(event64));
0a7de745 3037 }
3e170ce0 3038
0a7de745 3039 if (!g_waitq_set) {
3e170ce0 3040 g_waitq_set = sysctl_get_wqset(1);
0a7de745 3041 }
3e170ce0
A
3042 wqset = g_waitq_set;
3043
3044 printf("[WQ]: unlinking all queues from global wqset (0x%llx)\n",
0a7de745 3045 wqset_id(wqset));
3e170ce0
A
3046
3047 kr = waitq_set_unlink_all(wqset);
3048 printf("[WQ]: \tkr=%d\n", kr);
3049
3050 return SYSCTL_OUT(req, &kr, sizeof(kr));
3051}
3052SYSCTL_PROC(_kern, OID_AUTO, wqset_unlink_all, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 3053 0, 0, sysctl_wqset_unlink_all, "Q", "unlink all queues from test waitq set");
3e170ce0
A
3054
3055
3056static int sysctl_wqset_clear_preposts SYSCTL_HANDLER_ARGS
3057{
3058#pragma unused(oidp, arg1, arg2)
3059 struct waitq_set *wqset = NULL;
3060 uint64_t event64 = 0;
3061 int error, index;
3062
3063 error = SYSCTL_IN(req, &event64, sizeof(event64));
0a7de745 3064 if (error) {
3e170ce0 3065 return error;
0a7de745 3066 }
3e170ce0 3067
0a7de745 3068 if (!req->newptr) {
3e170ce0 3069 goto out;
0a7de745 3070 }
3e170ce0
A
3071
3072 index = (int)((event64) & 0xffffffff);
3073 wqset = sysctl_get_wqset(index);
3074 assert(wqset != NULL);
3075
3076 printf("[WQ]: clearing preposts on wqset 0x%llx\n", wqset_id(wqset));
3077 waitq_set_clear_preposts(wqset);
3078
3079out:
0a7de745 3080 if (wqset) {
3e170ce0 3081 event64 = wqset_id(wqset);
0a7de745 3082 } else {
3e170ce0 3083 event64 = (uint64_t)(-1);
0a7de745 3084 }
3e170ce0
A
3085
3086 return SYSCTL_OUT(req, &event64, sizeof(event64));
3087}
3088SYSCTL_PROC(_kern, OID_AUTO, wqset_clear_preposts, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 3089 0, 0, sysctl_wqset_clear_preposts, "Q", "clear preposts on given waitq set");
3e170ce0
A
3090
3091#endif /* CONFIG_WAITQ_DEBUG */
d9a64523
A
3092
3093static int
3094sysctl_waitq_set_nelem SYSCTL_HANDLER_ARGS
3095{
3096#pragma unused(oidp, arg1, arg2)
0a7de745 3097 int nelem;
d9a64523
A
3098
3099 /* Read only */
0a7de745
A
3100 if (req->newptr != USER_ADDR_NULL) {
3101 return EPERM;
3102 }
d9a64523
A
3103
3104 nelem = sysctl_helper_waitq_set_nelem();
3105
3106 return SYSCTL_OUT(req, &nelem, sizeof(nelem));
3107}
3108
3109SYSCTL_PROC(_kern, OID_AUTO, n_ltable_entries, CTLFLAG_RD | CTLFLAG_LOCKED,
0a7de745
A
3110 0, 0, sysctl_waitq_set_nelem, "I", "ltable elementis currently used");
3111
3112
cb323159
A
3113static int
3114sysctl_mpsc_test_pingpong SYSCTL_HANDLER_ARGS
3115{
3116#pragma unused(oidp, arg1, arg2)
3117 uint64_t value = 0;
3118 int error;
3119
3120 error = SYSCTL_IN(req, &value, sizeof(value));
3121 if (error) {
3122 return error;
3123 }
3124
3125 if (error == 0 && req->newptr) {
3126 error = mpsc_test_pingpong(value, &value);
3127 if (error == 0) {
3128 error = SYSCTL_OUT(req, &value, sizeof(value));
3129 }
3130 }
3131
3132 return error;
3133}
3134SYSCTL_PROC(_kern, OID_AUTO, mpsc_test_pingpong, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3135 0, 0, sysctl_mpsc_test_pingpong, "Q", "MPSC tests: pingpong");
3136
0a7de745
A
3137#endif /* DEVELOPMENT || DEBUG */
3138
3139/*Remote Time api*/
3140SYSCTL_NODE(_machdep, OID_AUTO, remotetime, CTLFLAG_RD | CTLFLAG_LOCKED, 0, "Remote time api");
d9a64523 3141
0a7de745
A
3142#if DEVELOPMENT || DEBUG
3143#if CONFIG_MACH_BRIDGE_SEND_TIME
3144extern _Atomic uint32_t bt_init_flag;
3145extern uint32_t mach_bridge_timer_enable(uint32_t, int);
3146
3147SYSCTL_INT(_machdep_remotetime, OID_AUTO, bridge_timer_init_flag,
3148 CTLFLAG_RD | CTLFLAG_LOCKED, &bt_init_flag, 0, "");
3149
3150static int sysctl_mach_bridge_timer_enable SYSCTL_HANDLER_ARGS
3151{
3152#pragma unused(oidp, arg1, arg2)
3153 uint32_t value = 0;
3154 int error = 0;
3155 /* User is querying buffer size */
3156 if (req->oldptr == USER_ADDR_NULL && req->newptr == USER_ADDR_NULL) {
3157 req->oldidx = sizeof(value);
3158 return 0;
3159 }
cb323159 3160 if (os_atomic_load(&bt_init_flag, acquire)) {
0a7de745
A
3161 if (req->newptr) {
3162 int new_value = 0;
3163 error = SYSCTL_IN(req, &new_value, sizeof(new_value));
3164 if (error) {
3165 return error;
3166 }
3167 if (new_value == 0 || new_value == 1) {
3168 value = mach_bridge_timer_enable(new_value, 1);
3169 } else {
3170 return EPERM;
3171 }
3172 } else {
3173 value = mach_bridge_timer_enable(0, 0);
3174 }
3175 }
3176 error = SYSCTL_OUT(req, &value, sizeof(value));
3177 return error;
3178}
3179
3180SYSCTL_PROC(_machdep_remotetime, OID_AUTO, bridge_timer_enable,
3181 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3182 0, 0, sysctl_mach_bridge_timer_enable, "I", "");
3183
3184#endif /* CONFIG_MACH_BRIDGE_SEND_TIME */
3185
3186static int sysctl_mach_bridge_remote_time SYSCTL_HANDLER_ARGS
3187{
3188#pragma unused(oidp, arg1, arg2)
3189 uint64_t ltime = 0, rtime = 0;
3190 if (req->oldptr == USER_ADDR_NULL) {
3191 req->oldidx = sizeof(rtime);
3192 return 0;
3193 }
3194 if (req->newptr) {
3195 int error = SYSCTL_IN(req, &ltime, sizeof(ltime));
3196 if (error) {
3197 return error;
3198 }
3199 }
3200 rtime = mach_bridge_remote_time(ltime);
3201 return SYSCTL_OUT(req, &rtime, sizeof(rtime));
3202}
3203SYSCTL_PROC(_machdep_remotetime, OID_AUTO, mach_bridge_remote_time,
3204 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3205 0, 0, sysctl_mach_bridge_remote_time, "Q", "");
d9a64523 3206
5ba3f43e
A
3207#endif /* DEVELOPMENT || DEBUG */
3208
0a7de745
A
3209#if CONFIG_MACH_BRIDGE_RECV_TIME
3210extern struct bt_params bt_params_get_latest(void);
3211
3212static int sysctl_mach_bridge_conversion_params SYSCTL_HANDLER_ARGS
3213{
3214#pragma unused(oidp, arg1, arg2)
3215 struct bt_params params = {};
3216 if (req->oldptr == USER_ADDR_NULL) {
3217 req->oldidx = sizeof(struct bt_params);
3218 return 0;
3219 }
3220 if (req->newptr) {
3221 return EPERM;
3222 }
3223 params = bt_params_get_latest();
3224 return SYSCTL_OUT(req, &params, MIN(sizeof(params), req->oldlen));
3225}
3226
3227SYSCTL_PROC(_machdep_remotetime, OID_AUTO, conversion_params,
3228 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0,
3229 0, sysctl_mach_bridge_conversion_params, "S,bt_params", "");
3230
3231#endif /* CONFIG_MACH_BRIDGE_RECV_TIME */
3232
cb323159 3233#if DEVELOPMENT || DEBUG
c6bf4f31
A
3234#if __AMP__
3235#include <pexpert/pexpert.h>
3236extern int32_t sysctl_get_bound_cpuid(void);
3237extern void sysctl_thread_bind_cpuid(int32_t cpuid);
3238static int
3239sysctl_kern_sched_thread_bind_cpu SYSCTL_HANDLER_ARGS
3240{
3241#pragma unused(oidp, arg1, arg2)
3242
3243 if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
3244 return ENOENT;
3245 }
3246
3247 int32_t cpuid = sysctl_get_bound_cpuid();
3248
3249 int32_t new_value;
3250 int changed;
3251 int error = sysctl_io_number(req, cpuid, sizeof cpuid, &new_value, &changed);
3252 if (error) {
3253 return error;
3254 }
3255
3256 if (changed) {
3257 sysctl_thread_bind_cpuid(new_value);
3258 }
3259
3260 return error;
3261}
3262
3263SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cpu, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3264 0, 0, sysctl_kern_sched_thread_bind_cpu, "I", "");
3265
3266extern char sysctl_get_bound_cluster_type(void);
3267extern void sysctl_thread_bind_cluster_type(char cluster_type);
3268static int
3269sysctl_kern_sched_thread_bind_cluster_type SYSCTL_HANDLER_ARGS
3270{
3271#pragma unused(oidp, arg1, arg2)
3272 char buff[4];
3273
3274 if (!PE_parse_boot_argn("enable_skstb", NULL, 0)) {
3275 return ENOENT;
3276 }
3277
3278 int error = SYSCTL_IN(req, buff, 1);
3279 if (error) {
3280 return error;
3281 }
3282 char cluster_type = buff[0];
3283
3284 if (!req->newptr) {
3285 goto out;
3286 }
3287
3288 sysctl_thread_bind_cluster_type(cluster_type);
3289out:
3290 cluster_type = sysctl_get_bound_cluster_type();
3291 buff[0] = cluster_type;
3292
3293 return SYSCTL_OUT(req, buff, 1);
3294}
3295
3296SYSCTL_PROC(_kern, OID_AUTO, sched_thread_bind_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
3297 0, 0, sysctl_kern_sched_thread_bind_cluster_type, "A", "");
3298
3299extern char sysctl_get_task_cluster_type(void);
3300extern void sysctl_task_set_cluster_type(char cluster_type);
3301static int
3302sysctl_kern_sched_task_set_cluster_type SYSCTL_HANDLER_ARGS
3303{
3304#pragma unused(oidp, arg1, arg2)
3305 char buff[4];
3306
3307 if (!PE_parse_boot_argn("enable_skstsct", NULL, 0)) {
3308 return ENOENT;
3309 }
3310
3311 int error = SYSCTL_IN(req, buff, 1);
3312 if (error) {
3313 return error;
3314 }
3315 char cluster_type = buff[0];
3316
3317 if (!req->newptr) {
3318 goto out;
3319 }
3320
3321 sysctl_task_set_cluster_type(cluster_type);
3322out:
3323 cluster_type = sysctl_get_task_cluster_type();
3324 buff[0] = cluster_type;
3325
3326 return SYSCTL_OUT(req, buff, 1);
3327}
3328
3329SYSCTL_PROC(_kern, OID_AUTO, sched_task_set_cluster_type, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED,
3330 0, 0, sysctl_kern_sched_task_set_cluster_type, "A", "");
f427ee49
A
3331
3332#if CONFIG_SCHED_EDGE
3333
3334/*
3335 * Edge Scheduler Sysctls
3336 *
3337 * The Edge scheduler uses edge configurations to decide feasability of
3338 * migrating threads across clusters. The sysctls allow dynamic configuration
3339 * of the edge properties and edge weights. This configuration is typically
3340 * updated via callouts from CLPC.
3341 *
3342 * <Edge Multi-cluster Support Needed>
3343 */
3344extern sched_clutch_edge sched_edge_config_e_to_p;
3345extern sched_clutch_edge sched_edge_config_p_to_e;
3346extern kern_return_t sched_edge_sysctl_configure_e_to_p(uint64_t);
3347extern kern_return_t sched_edge_sysctl_configure_p_to_e(uint64_t);
3348extern sched_clutch_edge sched_edge_e_to_p(void);
3349extern sched_clutch_edge sched_edge_p_to_e(void);
3350
3351static int sysctl_sched_edge_config_e_to_p SYSCTL_HANDLER_ARGS
3352{
3353#pragma unused(oidp, arg1, arg2)
3354 int error;
3355 kern_return_t kr;
3356 int64_t edge_config = 0;
3357
3358 error = SYSCTL_IN(req, &edge_config, sizeof(edge_config));
3359 if (error) {
3360 return error;
3361 }
3362
3363 if (!req->newptr) {
3364 edge_config = sched_edge_e_to_p().sce_edge_packed;
3365 return SYSCTL_OUT(req, &edge_config, sizeof(edge_config));
3366 }
3367
3368 kr = sched_edge_sysctl_configure_e_to_p(edge_config);
3369 return SYSCTL_OUT(req, &kr, sizeof(kr));
3370}
3371SYSCTL_PROC(_kern, OID_AUTO, sched_edge_config_e_to_p, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3372 0, 0, sysctl_sched_edge_config_e_to_p, "Q", "Edge Scheduler Config for E-to-P cluster");
3373
3374static int sysctl_sched_edge_config_p_to_e SYSCTL_HANDLER_ARGS
3375{
3376#pragma unused(oidp, arg1, arg2)
3377 int error;
3378 kern_return_t kr;
3379 int64_t edge_config = 0;
3380
3381 error = SYSCTL_IN(req, &edge_config, sizeof(edge_config));
3382 if (error) {
3383 return error;
3384 }
3385
3386 if (!req->newptr) {
3387 edge_config = sched_edge_p_to_e().sce_edge_packed;
3388 return SYSCTL_OUT(req, &edge_config, sizeof(edge_config));
3389 }
3390
3391 kr = sched_edge_sysctl_configure_p_to_e(edge_config);
3392 return SYSCTL_OUT(req, &kr, sizeof(kr));
3393}
3394SYSCTL_PROC(_kern, OID_AUTO, sched_edge_config_p_to_e, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
3395 0, 0, sysctl_sched_edge_config_p_to_e, "Q", "Edge Scheduler Config for P-to-E cluster");
3396
3397extern int sched_edge_restrict_ut;
3398SYSCTL_INT(_kern, OID_AUTO, sched_edge_restrict_ut, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_restrict_ut, 0, "Edge Scheduler Restrict UT Threads");
3399extern int sched_edge_restrict_bg;
3400SYSCTL_INT(_kern, OID_AUTO, sched_edge_restrict_bg, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_restrict_ut, 0, "Edge Scheduler Restrict BG Threads");
3401extern int sched_edge_migrate_ipi_immediate;
3402SYSCTL_INT(_kern, OID_AUTO, sched_edge_migrate_ipi_immediate, CTLFLAG_RW | CTLFLAG_LOCKED, &sched_edge_migrate_ipi_immediate, 0, "Edge Scheduler uses immediate IPIs for migration event based on execution latency");
3403
3404#endif /* CONFIG_SCHED_EDGE */
3405
c6bf4f31 3406#endif /* __AMP__ */
cb323159
A
3407#endif /* DEVELOPMENT || DEBUG */
3408
3409extern uint32_t task_exc_guard_default;
3410
3411SYSCTL_INT(_kern, OID_AUTO, task_exc_guard_default,
3412 CTLFLAG_RD | CTLFLAG_LOCKED, &task_exc_guard_default, 0, "");
0a7de745
A
3413
3414
3415static int
3416sysctl_kern_tcsm_available SYSCTL_HANDLER_ARGS
3417{
3418#pragma unused(oidp, arg1, arg2)
3419 uint32_t value = machine_csv(CPUVN_CI) ? 1 : 0;
3420
3421 if (req->newptr) {
3422 return EINVAL;
3423 }
3424
3425 return SYSCTL_OUT(req, &value, sizeof(value));
3426}
3427SYSCTL_PROC(_kern, OID_AUTO, tcsm_available,
3428 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY,
3429 0, 0, sysctl_kern_tcsm_available, "I", "");
3430
3431
3432static int
3433sysctl_kern_tcsm_enable SYSCTL_HANDLER_ARGS
3434{
3435#pragma unused(oidp, arg1, arg2)
3436 uint32_t soflags = 0;
3437 uint32_t old_value = thread_get_no_smt() ? 1 : 0;
3438
3439 int error = SYSCTL_IN(req, &soflags, sizeof(soflags));
3440 if (error) {
3441 return error;
3442 }
3443
3444 if (soflags && machine_csv(CPUVN_CI)) {
3445 thread_set_no_smt(true);
3446 machine_tecs(current_thread());
3447 }
3448
3449 return SYSCTL_OUT(req, &old_value, sizeof(old_value));
3450}
3451SYSCTL_PROC(_kern, OID_AUTO, tcsm_enable,
3452 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY,
3453 0, 0, sysctl_kern_tcsm_enable, "I", "");
3454
3455
3456#if DEVELOPMENT || DEBUG
3457extern void sysctl_task_set_no_smt(char no_smt);
3458extern char sysctl_task_get_no_smt(void);
3459
3460static int
3461sysctl_kern_sched_task_set_no_smt SYSCTL_HANDLER_ARGS
3462{
3463#pragma unused(oidp, arg1, arg2)
3464 char buff[4];
3465
3466 int error = SYSCTL_IN(req, buff, 1);
3467 if (error) {
3468 return error;
3469 }
3470 char no_smt = buff[0];
3471
3472 if (!req->newptr) {
3473 goto out;
3474 }
3475
3476 sysctl_task_set_no_smt(no_smt);
3477out:
3478 no_smt = sysctl_task_get_no_smt();
3479 buff[0] = no_smt;
3480
3481 return SYSCTL_OUT(req, buff, 1);
3482}
3483
3484SYSCTL_PROC(_kern, OID_AUTO, sched_task_set_no_smt, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
3485 0, 0, sysctl_kern_sched_task_set_no_smt, "A", "");
3486
3487static int
3488sysctl_kern_sched_thread_set_no_smt(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
3489{
3490 int new_value, changed;
3491 int old_value = thread_get_no_smt() ? 1 : 0;
3492 int error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
3493
3494 if (changed) {
3495 thread_set_no_smt(!!new_value);
3496 }
5ba3f43e 3497
0a7de745
A
3498 return error;
3499}
3500
3501SYSCTL_PROC(_kern, OID_AUTO, sched_thread_set_no_smt,
3502 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY,
3503 0, 0, sysctl_kern_sched_thread_set_no_smt, "I", "");
f427ee49
A
3504
3505static int
3506sysctl_kern_debug_get_preoslog SYSCTL_HANDLER_ARGS
3507{
3508#pragma unused(oidp, arg1, arg2)
3509 static bool oneshot_executed = false;
3510 size_t preoslog_size = 0;
3511 const char *preoslog = NULL;
3512
3513 // DumpPanic pases a non-zero write value when it needs oneshot behaviour
3514 if (req->newptr) {
3515 uint8_t oneshot = 0;
3516 int error = SYSCTL_IN(req, &oneshot, sizeof(oneshot));
3517 if (error) {
3518 return error;
3519 }
3520
3521 if (oneshot) {
3522 if (!OSCompareAndSwap8(false, true, &oneshot_executed)) {
3523 return EPERM;
3524 }
3525 }
3526 }
3527
3528 preoslog = sysctl_debug_get_preoslog(&preoslog_size);
3529 if (preoslog == NULL || preoslog_size == 0) {
3530 return 0;
3531 }
3532
3533 if (req->oldptr == USER_ADDR_NULL) {
3534 req->oldidx = preoslog_size;
3535 return 0;
3536 }
3537
3538 return SYSCTL_OUT(req, preoslog, preoslog_size);
3539}
3540
3541SYSCTL_PROC(_kern, OID_AUTO, preoslog, CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_LOCKED,
3542 0, 0, sysctl_kern_debug_get_preoslog, "-", "");
3543
3544static int
3545sysctl_kern_task_set_filter_msg_flag SYSCTL_HANDLER_ARGS
3546{
3547#pragma unused(oidp, arg1, arg2)
3548 int new_value, changed;
3549 int old_value = task_get_filter_msg_flag(current_task()) ? 1 : 0;
3550 int error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed);
3551
3552 if (changed) {
3553 task_set_filter_msg_flag(current_task(), !!new_value);
3554 }
3555
3556 return error;
3557}
3558
3559SYSCTL_PROC(_kern, OID_AUTO, task_set_filter_msg_flag, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
3560 0, 0, sysctl_kern_task_set_filter_msg_flag, "I", "");
3561
cb323159 3562#endif /* DEVELOPMENT || DEBUG */