git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2008 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
	29	/*-
	30	* Copyright (c) 1982, 1986, 1991, 1993
	31	* The Regents of the University of California. All rights reserved.
	32	* (c) UNIX System Laboratories, Inc.
	33	* All or some portions of this file are derived from material licensed
	34	* to the University of California by American Telephone and Telegraph
	35	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	36	* the permission of UNIX System Laboratories, Inc.
	37	*
	38	* Redistribution and use in source and binary forms, with or without
	39	* modification, are permitted provided that the following conditions
	40	* are met:
	41	* 1. Redistributions of source code must retain the above copyright
	42	* notice, this list of conditions and the following disclaimer.
	43	* 2. Redistributions in binary form must reproduce the above copyright
	44	* notice, this list of conditions and the following disclaimer in the
	45	* documentation and/or other materials provided with the distribution.
	46	* 3. All advertising materials mentioning features or use of this software
	47	* must display the following acknowledgement:
	48	* This product includes software developed by the University of
	49	* California, Berkeley and its contributors.
	50	* 4. Neither the name of the University nor the names of its contributors
	51	* may be used to endorse or promote products derived from this software
	52	* without specific prior written permission.
	53	*
	54	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	55	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	56	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	57	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	58	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	59	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	60	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	61	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	62	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	63	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	64	* SUCH DAMAGE.
	65	*
	66	* @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
	67	*/
	68	/*
	69	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
	70	* support for mandatory and extensible security protections. This notice
	71	* is included in support of clause 2.2 (b) of the Apple Public License,
	72	* Version 2.0.
	73	*/
	74
	75	#include <sys/param.h>
	76	#include <sys/systm.h>
	77	#include <sys/sysctl.h>
	78	#include <sys/kernel.h>
	79	#include <sys/file_internal.h>
	80	#include <sys/resourcevar.h>
	81	#include <sys/malloc.h>
	82	#include <sys/proc_internal.h>
	83	#include <sys/kauth.h>
	84	#include <machine/spl.h>
	85
	86	#include <sys/mount_internal.h>
	87	#include <sys/sysproto.h>
	88
	89	#include <security/audit/audit.h>
	90
	91	#include <machine/vmparam.h>
	92
	93	#include <mach/mach_types.h>
	94	#include <mach/time_value.h>
	95	#include <mach/task.h>
	96	#include <mach/task_info.h>
	97	#include <mach/vm_map.h>
	98	#include <mach/mach_vm.h>
	99	#include <mach/thread_act.h> /* for thread_policy_set( ) */
	100	#include <kern/lock.h>
	101	#include <kern/thread.h>
	102
	103	#include <kern/task.h>
	104	#include <kern/clock.h> /* for absolutetime_to_microtime() */
	105	#include <netinet/in.h> /* for TRAFFIC_MGT_SO_* */
	106	#include <sys/socketvar.h> /* for struct socket */
	107
	108	#include <vm/vm_map.h>
	109
	110	int donice(struct proc curp, struct proc chgp, int n);
	111	int dosetrlimit(struct proc p, u_int which, struct rlimit limp);
	112	int uthread_get_background_state(uthread_t);
	113	static void do_background_socket(struct proc *p, thread_t thread, int priority);
	114	static int do_background_thread(struct proc *curp, thread_t thread, int priority);
	115	static int do_background_proc(struct proc curp, struct proc targetp, int priority);
	116	void proc_apply_task_networkbg_internal(proc_t, thread_t);
	117	void proc_restore_task_networkbg_internal(proc_t, thread_t);
	118
	119	rlim_t maxdmap = MAXDSIZ; /* XXX */
	120	rlim_t maxsmap = MAXSSIZ - PAGE_SIZE; /* XXX */
	121
	122	/*
	123	* Limits on the number of open files per process, and the number
	124	* of child processes per process.
	125	*
	126	* Note: would be in kern/subr_param.c in FreeBSD.
	127	*/
	128	__private_extern__ int maxfilesperproc = OPEN_MAX; /* per-proc open files limit */
	129
	130	SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW \| CTLFLAG_LOCKED,
	131	&maxprocperuid, 0, "Maximum processes allowed per userid" );
	132
	133	SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW \| CTLFLAG_LOCKED,
	134	&maxfilesperproc, 0, "Maximum files allowed open per process" );
	135
	136	/* Args and fn for proc_iteration callback used in setpriority */
	137	struct puser_nice_args {
	138	proc_t curp;
	139	int prio;
	140	id_t who;
	141	int * foundp;
	142	int * errorp;
	143	};
	144	static int puser_donice_callback(proc_t p, void * arg);
	145
	146
	147	/* Args and fn for proc_iteration callback used in setpriority */
	148	struct ppgrp_nice_args {
	149	proc_t curp;
	150	int prio;
	151	int * foundp;
	152	int * errorp;
	153	};
	154	static int ppgrp_donice_callback(proc_t p, void * arg);
	155
	156	/*
	157	* Resource controls and accounting.
	158	*/
	159	int
	160	getpriority(struct proc curp, struct getpriority_args uap, int32_t *retval)
	161	{
	162	struct proc *p;
	163	int low = PRIO_MAX + 1;
	164	kauth_cred_t my_cred;
	165
	166	/* would also test (uap->who < 0), but id_t is unsigned */
	167	if (uap->who > 0x7fffffff)
	168	return (EINVAL);
	169
	170	switch (uap->which) {
	171
	172	case PRIO_PROCESS:
	173	if (uap->who == 0) {
	174	p = curp;
	175	low = p->p_nice;
	176	} else {
	177	p = proc_find(uap->who);
	178	if (p == 0)
	179	break;
	180	low = p->p_nice;
	181	proc_rele(p);
	182
	183	}
	184	break;
	185
	186	case PRIO_PGRP: {
	187	struct pgrp *pg = PGRP_NULL;
	188
	189	if (uap->who == 0) {
	190	/* returns the pgrp to ref */
	191	pg = proc_pgrp(curp);
	192	} else if ((pg = pgfind(uap->who)) == PGRP_NULL) {
	193	break;
	194	}
	195	/* No need for iteration as it is a simple scan */
	196	pgrp_lock(pg);
	197	for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) {
	198	if (p->p_nice < low)
	199	low = p->p_nice;
	200	}
	201	pgrp_unlock(pg);
	202	pg_rele(pg);
	203	break;
	204	}
	205
	206	case PRIO_USER:
	207	if (uap->who == 0)
	208	uap->who = kauth_cred_getuid(kauth_cred_get());
	209
	210	proc_list_lock();
	211
	212	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
	213	my_cred = kauth_cred_proc_ref(p);
	214	if (kauth_cred_getuid(my_cred) == uap->who &&
	215	p->p_nice < low)
	216	low = p->p_nice;
	217	kauth_cred_unref(&my_cred);
	218	}
	219
	220	proc_list_unlock();
	221
	222	break;
	223
	224	case PRIO_DARWIN_THREAD: {
	225	thread_t thread;
	226	struct uthread *ut;
	227
	228	/* we currently only support the current thread */
	229	if (uap->who != 0) {
	230	return (EINVAL);
	231	}
	232
	233	thread = current_thread();
	234	ut = get_bsdthread_info(thread);
	235
	236	low = 0;
	237	if ( (ut->uu_flag & UT_BACKGROUND_TRAFFIC_MGT) != 0 ) {
	238	low = 1;
	239	}
	240	break;
	241	}
	242
	243	default:
	244	return (EINVAL);
	245	}
	246	if (low == PRIO_MAX + 1)
	247	return (ESRCH);
	248	*retval = low;
	249	return (0);
	250	}
	251
	252	/* call back function used for proc iteration in PRIO_USER */
	253	static int
	254	puser_donice_callback(proc_t p, void * arg)
	255	{
	256	int error, n;
	257	struct puser_nice_args * pun = (struct puser_nice_args *)arg;
	258	kauth_cred_t my_cred;
	259
	260	my_cred = kauth_cred_proc_ref(p);
	261	if (kauth_cred_getuid(my_cred) == pun->who) {
	262	error = donice(pun->curp, p, pun->prio);
	263	if (pun->errorp != NULL)
	264	*pun->errorp = error;
	265	if (pun->foundp != NULL) {
	266	n = *pun->foundp;
	267	*pun->foundp = n+1;
	268	}
	269	}
	270	kauth_cred_unref(&my_cred);
	271
	272	return(PROC_RETURNED);
	273	}
	274
	275	/* call back function used for proc iteration in PRIO_PGRP */
	276	static int
	277	ppgrp_donice_callback(proc_t p, void * arg)
	278	{
	279	int error;
	280	struct ppgrp_nice_args * pun = (struct ppgrp_nice_args *)arg;
	281	int n;
	282
	283	error = donice(pun->curp, p, pun->prio);
	284	if (pun->errorp != NULL)
	285	*pun->errorp = error;
	286	if (pun->foundp!= NULL) {
	287	n = *pun->foundp;
	288	*pun->foundp = n+1;
	289	}
	290
	291	return(PROC_RETURNED);
	292	}
	293
	294	/*
	295	* Returns: 0 Success
	296	* EINVAL
	297	* ESRCH
	298	* donice:EPERM
	299	* donice:EACCES
	300	*/
	301	/* ARGSUSED */
	302	int
	303	setpriority(struct proc curp, struct setpriority_args uap, __unused int32_t *retval)
	304	{
	305	struct proc *p;
	306	int found = 0, error = 0;
	307	int refheld = 0;
	308
	309	AUDIT_ARG(cmd, uap->which);
	310	AUDIT_ARG(owner, uap->who, 0);
	311	AUDIT_ARG(value32, uap->prio);
	312
	313	/* would also test (uap->who < 0), but id_t is unsigned */
	314	if (uap->who > 0x7fffffff)
	315	return (EINVAL);
	316
	317	switch (uap->which) {
	318
	319	case PRIO_PROCESS:
	320	if (uap->who == 0)
	321	p = curp;
	322	else {
	323	p = proc_find(uap->who);
	324	if (p == 0)
	325	break;
	326	refheld = 1;
	327	}
	328	error = donice(curp, p, uap->prio);
	329	found++;
	330	if (refheld != 0)
	331	proc_rele(p);
	332	break;
	333
	334	case PRIO_PGRP: {
	335	struct pgrp *pg = PGRP_NULL;
	336	struct ppgrp_nice_args ppgrp;
	337
	338	if (uap->who == 0) {
	339	pg = proc_pgrp(curp);
	340	} else if ((pg = pgfind(uap->who)) == PGRP_NULL)
	341	break;
	342
	343	ppgrp.curp = curp;
	344	ppgrp.prio = uap->prio;
	345	ppgrp.foundp = &found;
	346	ppgrp.errorp = &error;
	347
	348	/* PGRP_DROPREF drops the reference on process group */
	349	pgrp_iterate(pg, PGRP_DROPREF, ppgrp_donice_callback, (void *)&ppgrp, NULL, NULL);
	350
	351	break;
	352	}
	353
	354	case PRIO_USER: {
	355	struct puser_nice_args punice;
	356
	357	if (uap->who == 0)
	358	uap->who = kauth_cred_getuid(kauth_cred_get());
	359
	360	punice.curp = curp;
	361	punice.prio = uap->prio;
	362	punice.who = uap->who;
	363	punice.foundp = &found;
	364	error = 0;
	365	punice.errorp = &error;
	366	proc_iterate(PROC_ALLPROCLIST, puser_donice_callback, (void *)&punice, NULL, NULL);
	367
	368	break;
	369	}
	370
	371	case PRIO_DARWIN_THREAD: {
	372	/* process marked for termination no priority management */
	373	if ((curp->p_lflag & P_LPTERMINATE) != 0)
	374	return(EINVAL);
	375	/* we currently only support the current thread */
	376	if (uap->who != 0) {
	377	return (EINVAL);
	378	}
	379	error = do_background_thread(curp, current_thread(), uap->prio);
	380	if (!error) {
	381	(void) do_background_socket(curp, current_thread(), uap->prio);
	382	}
	383	found++;
	384	break;
	385	}
	386
	387	case PRIO_DARWIN_PROCESS: {
	388	if (uap->who == 0)
	389	p = curp;
	390	else {
	391	p = proc_find(uap->who);
	392	if (p == 0)
	393	break;
	394	refheld = 1;
	395	}
	396
	397	/* process marked for termination no priority management */
	398	if ((p->p_lflag & P_LPTERMINATE) != 0) {
	399	error = EINVAL;
	400	} else {
	401	error = do_background_proc(curp, p, uap->prio);
	402	if (!error) {
	403	(void) do_background_socket(p, NULL, uap->prio);
	404	}
	405
	406	}
	407	found++;
	408	if (refheld != 0)
	409	proc_rele(p);
	410	break;
	411	}
	412
	413	default:
	414	return (EINVAL);
	415	}
	416	if (found == 0)
	417	return (ESRCH);
	418	return (error);
	419	}
	420
	421
	422	/*
	423	* Returns: 0 Success
	424	* EPERM
	425	* EACCES
	426	* mac_check_proc_sched:???
	427	*/
	428	int
	429	donice(struct proc curp, struct proc chgp, int n)
	430	{
	431	int error = 0;
	432	kauth_cred_t ucred;
	433	kauth_cred_t my_cred;
	434
	435	ucred = kauth_cred_proc_ref(curp);
	436	my_cred = kauth_cred_proc_ref(chgp);
	437
	438	if (suser(ucred, NULL) && kauth_cred_getruid(ucred) &&
	439	kauth_cred_getuid(ucred) != kauth_cred_getuid(my_cred) &&
	440	kauth_cred_getruid(ucred) != kauth_cred_getuid(my_cred)) {
	441	error = EPERM;
	442	goto out;
	443	}
	444	if (n > PRIO_MAX)
	445	n = PRIO_MAX;
	446	if (n < PRIO_MIN)
	447	n = PRIO_MIN;
	448	if (n < chgp->p_nice && suser(ucred, &curp->p_acflag)) {
	449	error = EACCES;
	450	goto out;
	451	}
	452	#if CONFIG_MACF
	453	error = mac_proc_check_sched(curp, chgp);
	454	if (error)
	455	goto out;
	456	#endif
	457	proc_lock(chgp);
	458	chgp->p_nice = n;
	459	proc_unlock(chgp);
	460	(void)resetpriority(chgp);
	461	out:
	462	kauth_cred_unref(&ucred);
	463	kauth_cred_unref(&my_cred);
	464	return (error);
	465	}
	466
	467	static int
	468	do_background_proc(struct proc curp, struct proc targetp, int priority)
	469	{
	470	int error = 0;
	471	kauth_cred_t ucred;
	472	kauth_cred_t target_cred;
	473
	474	ucred = kauth_cred_get();
	475	target_cred = kauth_cred_proc_ref(targetp);
	476
	477	if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
	478	kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
	479	kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred))
	480	{
	481	error = EPERM;
	482	goto out;
	483	}
	484
	485	#if CONFIG_MACF
	486	error = mac_proc_check_sched(curp, targetp);
	487	if (error)
	488	goto out;
	489	#endif
	490
	491	if (priority == PRIO_DARWIN_NONUI)
	492	error = proc_apply_task_gpuacc(targetp->task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
	493	else
	494	error = proc_set_and_apply_bgtaskpolicy(targetp->task, priority);
	495	if (error)
	496	goto out;
	497
	498	out:
	499	kauth_cred_unref(&target_cred);
	500	return (error);
	501	}
	502
	503	static void
	504	do_background_socket(struct proc *p, thread_t thread, int priority)
	505	{
	506	struct filedesc *fdp;
	507	struct fileproc *fp;
	508	int i;
	509
	510	if (priority == PRIO_DARWIN_BG) {
	511	/*
	512	* For PRIO_DARWIN_PROCESS (thread is NULL), simply mark
	513	* the sockets with the background flag. There's nothing
	514	* to do here for the PRIO_DARWIN_THREAD case.
	515	*/
	516	if (thread == NULL) {
	517	proc_fdlock(p);
	518	fdp = p->p_fd;
	519
	520	for (i = 0; i < fdp->fd_nfiles; i++) {
	521	struct socket *sockp;
	522
	523	fp = fdp->fd_ofiles[i];
	524	if (fp == NULL \|\| (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 \|\|
	525	fp->f_fglob->fg_type != DTYPE_SOCKET) {
	526	continue;
	527	}
	528	sockp = (struct socket *)fp->f_fglob->fg_data;
	529	socket_set_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
	530	sockp->so_background_thread = NULL;
	531	}
	532	proc_fdunlock(p);
	533	}
	534
	535	} else {
	536
	537	/* disable networking IO throttle.
	538	* NOTE - It is a known limitation of the current design that we
	539	* could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for
	540	* sockets created by other threads within this process.
	541	*/
	542	proc_fdlock(p);
	543	fdp = p->p_fd;
	544	for ( i = 0; i < fdp->fd_nfiles; i++ ) {
	545	struct socket *sockp;
	546
	547	fp = fdp->fd_ofiles[ i ];
	548	if ( fp == NULL \|\| (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 \|\|
	549	fp->f_fglob->fg_type != DTYPE_SOCKET ) {
	550	continue;
	551	}
	552	sockp = (struct socket *)fp->f_fglob->fg_data;
	553	/* skip if only clearing this thread's sockets */
	554	if ((thread) && (sockp->so_background_thread != thread)) {
	555	continue;
	556	}
	557	socket_clear_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
	558	sockp->so_background_thread = NULL;
	559	}
	560	proc_fdunlock(p);
	561	}
	562	}
	563
	564
	565	/*
	566	* do_background_thread
	567	* Returns: 0 Success
	568	* XXX - todo - does this need a MACF hook?
	569	*
	570	* NOTE: To maintain binary compatibility with PRIO_DARWIN_THREAD with respect
	571	* to network traffic management, UT_BACKGROUND_TRAFFIC_MGT is set/cleared
	572	* along with UT_BACKGROUND flag, as the latter alone no longer implies
	573	* any form of traffic regulation (it simply means that the thread is
	574	* background.) With PRIO_DARWIN_PROCESS, any form of network traffic
	575	* management must be explicitly requested via whatever means appropriate,
	576	* and only TRAFFIC_MGT_SO_BACKGROUND is set via do_background_socket().
	577	*/
	578	static int
	579	do_background_thread(struct proc *curp __unused, thread_t thread, int priority)
	580	{
	581	struct uthread *ut;
	582	int error = 0;
	583
	584	ut = get_bsdthread_info(thread);
	585
	586	/* Backgrounding is unsupported for threads in vfork */
	587	if ( (ut->uu_flag & UT_VFORK) != 0) {
	588	return(EPERM);
	589	}
	590
	591	error = proc_set_and_apply_bgthreadpolicy(curp->task, thread_tid(thread), priority);
	592	return(error);
	593
	594	}
	595
	596	#if CONFIG_EMBEDDED
	597	int mach_do_background_thread(thread_t thread, int prio);
	598
	599	int
	600	mach_do_background_thread(thread_t thread, int prio)
	601	{
	602	int error = 0;
	603	struct proc *curp = NULL;
	604	struct proc *targetp = NULL;
	605	kauth_cred_t ucred;
	606
	607	targetp = get_bsdtask_info(get_threadtask(thread));
	608	if (!targetp) {
	609	return KERN_INVALID_ARGUMENT;
	610	}
	611
	612	curp = proc_self();
	613	if (curp == PROC_NULL) {
	614	return KERN_FAILURE;
	615	}
	616
	617	ucred = kauth_cred_proc_ref(curp);
	618
	619	if (suser(ucred, NULL) && curp != targetp) {
	620	error = KERN_PROTECTION_FAILURE;
	621	goto out;
	622	}
	623
	624	error = do_background_thread(curp, thread, prio);
	625	if (!error) {
	626	(void) do_background_socket(curp, thread, prio);
	627	} else {
	628	if (error == EPERM) {
	629	error = KERN_PROTECTION_FAILURE;
	630	} else {
	631	error = KERN_FAILURE;
	632	}
	633	}
	634
	635	out:
	636	proc_rele(curp);
	637	kauth_cred_unref(&ucred);
	638	return error;
	639	}
	640	#endif /* CONFIG_EMBEDDED */
	641
	642	/*
	643	* Returns: 0 Success
	644	* copyin:EFAULT
	645	* dosetrlimit:
	646	*/
	647	/* ARGSUSED */
	648	int
	649	setrlimit(struct proc p, struct setrlimit_args uap, __unused int32_t *retval)
	650	{
	651	struct rlimit alim;
	652	int error;
	653
	654	if ((error = copyin(uap->rlp, (caddr_t)&alim,
	655	sizeof (struct rlimit))))
	656	return (error);
	657
	658	return (dosetrlimit(p, uap->which, &alim));
	659	}
	660
	661	/*
	662	* Returns: 0 Success
	663	* EINVAL
	664	* ENOMEM Cannot copy limit structure
	665	* suser:EPERM
	666	*
	667	* Notes: EINVAL is returned both for invalid arguments, and in the
	668	* case that the current usage (e.g. RLIMIT_STACK) is already
	669	* in excess of the requested limit.
	670	*/
	671	int
	672	dosetrlimit(struct proc p, u_int which, struct rlimit limp)
	673	{
	674	struct rlimit *alimp;
	675	int error;
	676	kern_return_t kr;
	677	int posix = (which & _RLIMIT_POSIX_FLAG) ? 1 : 0;
	678
	679	/* Mask out POSIX flag, saved above */
	680	which &= ~_RLIMIT_POSIX_FLAG;
	681
	682	if (which >= RLIM_NLIMITS)
	683	return (EINVAL);
	684
	685	alimp = &p->p_rlimit[which];
	686	if (limp->rlim_cur > limp->rlim_max)
	687	return EINVAL;
	688
	689	if (limp->rlim_cur > alimp->rlim_max \|\|
	690	limp->rlim_max > alimp->rlim_max)
	691	if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
	692	return (error);
	693	}
	694
	695	proc_limitblock(p);
	696
	697	if ((error = proc_limitreplace(p)) != 0) {
	698	proc_limitunblock(p);
	699	return(error);
	700	}
	701
	702	alimp = &p->p_rlimit[which];
	703
	704	switch (which) {
	705
	706	case RLIMIT_CPU:
	707	if (limp->rlim_cur == RLIM_INFINITY) {
	708	task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
	709	timerclear(&p->p_rlim_cpu);
	710	}
	711	else {
	712	task_absolutetime_info_data_t tinfo;
	713	mach_msg_type_number_t count;
	714	struct timeval ttv, tv;
	715	clock_sec_t tv_sec;
	716	clock_usec_t tv_usec;
	717
	718	count = TASK_ABSOLUTETIME_INFO_COUNT;
	719	task_info(p->task, TASK_ABSOLUTETIME_INFO,
	720	(task_info_t)&tinfo, &count);
	721	absolutetime_to_microtime(tinfo.total_user + tinfo.total_system,
	722	&tv_sec, &tv_usec);
	723	ttv.tv_sec = tv_sec;
	724	ttv.tv_usec = tv_usec;
	725
	726	tv.tv_sec = (limp->rlim_cur > __INT_MAX__ ? __INT_MAX__ : limp->rlim_cur);
	727	tv.tv_usec = 0;
	728	timersub(&tv, &ttv, &p->p_rlim_cpu);
	729
	730	timerclear(&tv);
	731	if (timercmp(&p->p_rlim_cpu, &tv, >))
	732	task_vtimer_set(p->task, TASK_VTIMER_RLIM);
	733	else {
	734	task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
	735
	736	timerclear(&p->p_rlim_cpu);
	737
	738	psignal(p, SIGXCPU);
	739	}
	740	}
	741	break;
	742
	743	case RLIMIT_DATA:
	744	if (limp->rlim_cur > maxdmap)
	745	limp->rlim_cur = maxdmap;
	746	if (limp->rlim_max > maxdmap)
	747	limp->rlim_max = maxdmap;
	748	break;
	749
	750	case RLIMIT_STACK:
	751	/* Disallow illegal stack size instead of clipping */
	752	if (limp->rlim_cur > maxsmap \|\|
	753	limp->rlim_max > maxsmap) {
	754	if (posix) {
	755	error = EINVAL;
	756	goto out;
	757	}
	758	else {
	759	/*
	760	* 4797860 - workaround poorly written installers by
	761	* doing previous implementation (< 10.5) when caller
	762	* is non-POSIX conforming.
	763	*/
	764	if (limp->rlim_cur > maxsmap)
	765	limp->rlim_cur = maxsmap;
	766	if (limp->rlim_max > maxsmap)
	767	limp->rlim_max = maxsmap;
	768	}
	769	}
	770
	771	/*
	772	* Stack is allocated to the max at exec time with only
	773	* "rlim_cur" bytes accessible. If stack limit is going
	774	* up make more accessible, if going down make inaccessible.
	775	*/
	776	if (limp->rlim_cur > alimp->rlim_cur) {
	777	user_addr_t addr;
	778	user_size_t size;
	779
	780	/* grow stack */
	781	size = round_page_64(limp->rlim_cur);
	782	size -= round_page_64(alimp->rlim_cur);
	783
	784	addr = p->user_stack - round_page_64(limp->rlim_cur);
	785	kr = mach_vm_protect(current_map(),
	786	addr, size,
	787	FALSE, VM_PROT_DEFAULT);
	788	if (kr != KERN_SUCCESS) {
	789	error = EINVAL;
	790	goto out;
	791	}
	792	} else if (limp->rlim_cur < alimp->rlim_cur) {
	793	user_addr_t addr;
	794	user_size_t size;
	795	user_addr_t cur_sp;
	796
	797	/* shrink stack */
	798
	799	/*
	800	* First check if new stack limit would agree
	801	* with current stack usage.
	802	* Get the current thread's stack pointer...
	803	*/
	804	cur_sp = thread_adjuserstack(current_thread(),
	805	0);
	806	if (cur_sp <= p->user_stack &&
	807	cur_sp > (p->user_stack -
	808	round_page_64(alimp->rlim_cur))) {
	809	/* stack pointer is in main stack */
	810	if (cur_sp <= (p->user_stack -
	811	round_page_64(limp->rlim_cur))) {
	812	/*
	813	* New limit would cause
	814	* current usage to be invalid:
	815	* reject new limit.
	816	*/
	817	error = EINVAL;
	818	goto out;
	819	}
	820	} else {
	821	/* not on the main stack: reject */
	822	error = EINVAL;
	823	goto out;
	824	}
	825
	826	size = round_page_64(alimp->rlim_cur);
	827	size -= round_page_64(limp->rlim_cur);
	828
	829	addr = p->user_stack - round_page_64(alimp->rlim_cur);
	830
	831	kr = mach_vm_protect(current_map(),
	832	addr, size,
	833	FALSE, VM_PROT_NONE);
	834	if (kr != KERN_SUCCESS) {
	835	error = EINVAL;
	836	goto out;
	837	}
	838	} else {
	839	/* no change ... */
	840	}
	841	break;
	842
	843	case RLIMIT_NOFILE:
	844	/*
	845	* Only root can set the maxfiles limits, as it is
	846	* systemwide resource. If we are expecting POSIX behavior,
	847	* instead of clamping the value, return EINVAL. We do this
	848	* because historically, people have been able to attempt to
	849	* set RLIM_INFINITY to get "whatever the maximum is".
	850	*/
	851	if ( is_suser() ) {
	852	if (limp->rlim_cur != alimp->rlim_cur &&
	853	limp->rlim_cur > (rlim_t)maxfiles) {
	854	if (posix) {
	855	error = EINVAL;
	856	goto out;
	857	}
	858	limp->rlim_cur = maxfiles;
	859	}
	860	if (limp->rlim_max != alimp->rlim_max &&
	861	limp->rlim_max > (rlim_t)maxfiles)
	862	limp->rlim_max = maxfiles;
	863	}
	864	else {
	865	if (limp->rlim_cur != alimp->rlim_cur &&
	866	limp->rlim_cur > (rlim_t)maxfilesperproc) {
	867	if (posix) {
	868	error = EINVAL;
	869	goto out;
	870	}
	871	limp->rlim_cur = maxfilesperproc;
	872	}
	873	if (limp->rlim_max != alimp->rlim_max &&
	874	limp->rlim_max > (rlim_t)maxfilesperproc)
	875	limp->rlim_max = maxfilesperproc;
	876	}
	877	break;
	878
	879	case RLIMIT_NPROC:
	880	/*
	881	* Only root can set to the maxproc limits, as it is
	882	* systemwide resource; all others are limited to
	883	* maxprocperuid (presumably less than maxproc).
	884	*/
	885	if ( is_suser() ) {
	886	if (limp->rlim_cur > (rlim_t)maxproc)
	887	limp->rlim_cur = maxproc;
	888	if (limp->rlim_max > (rlim_t)maxproc)
	889	limp->rlim_max = maxproc;
	890	}
	891	else {
	892	if (limp->rlim_cur > (rlim_t)maxprocperuid)
	893	limp->rlim_cur = maxprocperuid;
	894	if (limp->rlim_max > (rlim_t)maxprocperuid)
	895	limp->rlim_max = maxprocperuid;
	896	}
	897	break;
	898
	899	case RLIMIT_MEMLOCK:
	900	/*
	901	* Tell the Mach VM layer about the new limit value.
	902	*/
	903
	904	vm_map_set_user_wire_limit(current_map(), limp->rlim_cur);
	905	break;
	906
	907	} /* switch... */
	908	proc_lock(p);
	909	alimp = limp;
	910	proc_unlock(p);
	911	error = 0;
	912	out:
	913	proc_limitunblock(p);
	914	return (error);
	915	}
	916
	917	/* ARGSUSED */
	918	int
	919	getrlimit(struct proc p, struct getrlimit_args uap, __unused int32_t *retval)
	920	{
	921	struct rlimit lim;
	922
	923	/*
	924	* Take out flag now in case we need to use it to trigger variant
	925	* behaviour later.
	926	*/
	927	uap->which &= ~_RLIMIT_POSIX_FLAG;
	928
	929	if (uap->which >= RLIM_NLIMITS)
	930	return (EINVAL);
	931	proc_limitget(p, uap->which, &lim);
	932	return (copyout((caddr_t)&lim,
	933	uap->rlp, sizeof (struct rlimit)));
	934	}
	935
	936	/*
	937	* Transform the running time and tick information in proc p into user,
	938	* system, and interrupt time usage.
	939	*/
	940	/* No lock on proc is held for this.. */
	941	void
	942	calcru(struct proc p, struct timeval up, struct timeval sp, struct timeval ip)
	943	{
	944	task_t task;
	945
	946	timerclear(up);
	947	timerclear(sp);
	948	if (ip != NULL)
	949	timerclear(ip);
	950
	951	task = p->task;
	952	if (task) {
	953	mach_task_basic_info_data_t tinfo;
	954	task_thread_times_info_data_t ttimesinfo;
	955	task_events_info_data_t teventsinfo;
	956	mach_msg_type_number_t task_info_count, task_ttimes_count;
	957	mach_msg_type_number_t task_events_count;
	958	struct timeval ut,st;
	959
	960	task_info_count = MACH_TASK_BASIC_INFO_COUNT;
	961	task_info(task, MACH_TASK_BASIC_INFO,
	962	(task_info_t)&tinfo, &task_info_count);
	963	ut.tv_sec = tinfo.user_time.seconds;
	964	ut.tv_usec = tinfo.user_time.microseconds;
	965	st.tv_sec = tinfo.system_time.seconds;
	966	st.tv_usec = tinfo.system_time.microseconds;
	967	timeradd(&ut, up, up);
	968	timeradd(&st, sp, sp);
	969
	970	task_ttimes_count = TASK_THREAD_TIMES_INFO_COUNT;
	971	task_info(task, TASK_THREAD_TIMES_INFO,
	972	(task_info_t)&ttimesinfo, &task_ttimes_count);
	973
	974	ut.tv_sec = ttimesinfo.user_time.seconds;
	975	ut.tv_usec = ttimesinfo.user_time.microseconds;
	976	st.tv_sec = ttimesinfo.system_time.seconds;
	977	st.tv_usec = ttimesinfo.system_time.microseconds;
	978	timeradd(&ut, up, up);
	979	timeradd(&st, sp, sp);
	980
	981	task_events_count = TASK_EVENTS_INFO_COUNT;
	982	task_info(task, TASK_EVENTS_INFO,
	983	(task_info_t)&teventsinfo, &task_events_count);
	984
	985	/*
	986	* No need to lock "p": this does not need to be
	987	* completely consistent, right ?
	988	*/
	989	p->p_stats->p_ru.ru_minflt = (teventsinfo.faults -
	990	teventsinfo.pageins);
	991	p->p_stats->p_ru.ru_majflt = teventsinfo.pageins;
	992	p->p_stats->p_ru.ru_nivcsw = (teventsinfo.csw -
	993	p->p_stats->p_ru.ru_nvcsw);
	994	if (p->p_stats->p_ru.ru_nivcsw < 0)
	995	p->p_stats->p_ru.ru_nivcsw = 0;
	996
	997	p->p_stats->p_ru.ru_maxrss = tinfo.resident_size_max;
	998	}
	999	}
	1000
	1001	__private_extern__ void munge_user64_rusage(struct rusage a_rusage_p, struct user64_rusage a_user_rusage_p);
	1002	__private_extern__ void munge_user32_rusage(struct rusage a_rusage_p, struct user32_rusage a_user_rusage_p);
	1003
	1004	/* ARGSUSED */
	1005	int
	1006	getrusage(struct proc p, struct getrusage_args uap, __unused int32_t *retval)
	1007	{
	1008	struct rusage *rup, rubuf;
	1009	struct user64_rusage rubuf64;
	1010	struct user32_rusage rubuf32;
	1011	size_t retsize = sizeof(rubuf); /* default: 32 bits */
	1012	caddr_t retbuf = (caddr_t)&rubuf; /* default: 32 bits */
	1013	struct timeval utime;
	1014	struct timeval stime;
	1015
	1016
	1017	switch (uap->who) {
	1018	case RUSAGE_SELF:
	1019	calcru(p, &utime, &stime, NULL);
	1020	proc_lock(p);
	1021	rup = &p->p_stats->p_ru;
	1022	rup->ru_utime = utime;
	1023	rup->ru_stime = stime;
	1024
	1025	rubuf = *rup;
	1026	proc_unlock(p);
	1027
	1028	break;
	1029
	1030	case RUSAGE_CHILDREN:
	1031	proc_lock(p);
	1032	rup = &p->p_stats->p_cru;
	1033	rubuf = *rup;
	1034	proc_unlock(p);
	1035	break;
	1036
	1037	default:
	1038	return (EINVAL);
	1039	}
	1040	if (IS_64BIT_PROCESS(p)) {
	1041	retsize = sizeof(rubuf64);
	1042	retbuf = (caddr_t)&rubuf64;
	1043	munge_user64_rusage(&rubuf, &rubuf64);
	1044	} else {
	1045	retsize = sizeof(rubuf32);
	1046	retbuf = (caddr_t)&rubuf32;
	1047	munge_user32_rusage(&rubuf, &rubuf32);
	1048	}
	1049
	1050	return (copyout(retbuf, uap->rusage, retsize));
	1051	}
	1052
	1053	void
	1054	ruadd(struct rusage ru, struct rusage ru2)
	1055	{
	1056	long ip, ip2;
	1057	long i;
	1058
	1059	timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
	1060	timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
	1061	if (ru->ru_maxrss < ru2->ru_maxrss)
	1062	ru->ru_maxrss = ru2->ru_maxrss;
	1063	ip = &ru->ru_first; ip2 = &ru2->ru_first;
	1064	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
	1065	ip++ += ip2++;
	1066	}
	1067
	1068	void
	1069	proc_limitget(proc_t p, int which, struct rlimit * limp)
	1070	{
	1071	proc_list_lock();
	1072	limp->rlim_cur = p->p_rlimit[which].rlim_cur;
	1073	limp->rlim_max = p->p_rlimit[which].rlim_max;
	1074	proc_list_unlock();
	1075	}
	1076
	1077
	1078	void
	1079	proc_limitdrop(proc_t p, int exiting)
	1080	{
	1081	struct plimit * freelim = NULL;
	1082	struct plimit * freeoldlim = NULL;
	1083
	1084	proc_list_lock();
	1085
	1086	if (--p->p_limit->pl_refcnt == 0) {
	1087	freelim = p->p_limit;
	1088	p->p_limit = NULL;
	1089	}
	1090	if ((exiting != 0) && (p->p_olimit != NULL) && (--p->p_olimit->pl_refcnt == 0)) {
	1091	freeoldlim = p->p_olimit;
	1092	p->p_olimit = NULL;
	1093	}
	1094
	1095	proc_list_unlock();
	1096	if (freelim != NULL)
	1097	FREE_ZONE(freelim, sizeof *p->p_limit, M_PLIMIT);
	1098	if (freeoldlim != NULL)
	1099	FREE_ZONE(freeoldlim, sizeof *p->p_olimit, M_PLIMIT);
	1100	}
	1101
	1102
	1103	void
	1104	proc_limitfork(proc_t parent, proc_t child)
	1105	{
	1106	proc_list_lock();
	1107	child->p_limit = parent->p_limit;
	1108	child->p_limit->pl_refcnt++;
	1109	child->p_olimit = NULL;
	1110	proc_list_unlock();
	1111	}
	1112
	1113	void
	1114	proc_limitblock(proc_t p)
	1115	{
	1116	proc_lock(p);
	1117	while (p->p_lflag & P_LLIMCHANGE) {
	1118	p->p_lflag \|= P_LLIMWAIT;
	1119	msleep(&p->p_olimit, &p->p_mlock, 0, "proc_limitblock", NULL);
	1120	}
	1121	p->p_lflag \|= P_LLIMCHANGE;
	1122	proc_unlock(p);
	1123
	1124	}
	1125
	1126
	1127	void
	1128	proc_limitunblock(proc_t p)
	1129	{
	1130	proc_lock(p);
	1131	p->p_lflag &= ~P_LLIMCHANGE;
	1132	if (p->p_lflag & P_LLIMWAIT) {
	1133	p->p_lflag &= ~P_LLIMWAIT;
	1134	wakeup(&p->p_olimit);
	1135	}
	1136	proc_unlock(p);
	1137	}
	1138
	1139	/* This is called behind serialization provided by proc_limitblock/unlbock */
	1140	int
	1141	proc_limitreplace(proc_t p)
	1142	{
	1143	struct plimit *copy;
	1144
	1145
	1146	proc_list_lock();
	1147
	1148	if (p->p_limit->pl_refcnt == 1) {
	1149	proc_list_unlock();
	1150	return(0);
	1151	}
	1152
	1153	proc_list_unlock();
	1154
	1155	MALLOC_ZONE(copy, struct plimit *,
	1156	sizeof(struct plimit), M_PLIMIT, M_WAITOK);
	1157	if (copy == NULL) {
	1158	return(ENOMEM);
	1159	}
	1160
	1161	proc_list_lock();
	1162	bcopy(p->p_limit->pl_rlimit, copy->pl_rlimit,
	1163	sizeof(struct rlimit) * RLIM_NLIMITS);
	1164	copy->pl_refcnt = 1;
	1165	/* hang on to reference to old till process exits */
	1166	p->p_olimit = p->p_limit;
	1167	p->p_limit = copy;
	1168	proc_list_unlock();
	1169
	1170	return(0);
	1171	}
	1172
	1173
	1174	/*
	1175	* iopolicysys
	1176	*
	1177	* Description: System call MUX for use in manipulating I/O policy attributes of the current process or thread
	1178	*
	1179	* Parameters: cmd Policy command
	1180	* arg Pointer to policy arguments
	1181	*
	1182	* Returns: 0 Success
	1183	* EINVAL Invalid command or invalid policy arguments
	1184	*
	1185	*/
	1186	int
	1187	iopolicysys(__unused struct proc p, __unused struct iopolicysys_args uap, __unused int32_t *retval)
	1188	{
	1189	int error = 0;
	1190	struct _iopol_param_t iop_param;
	1191	int processwide = 0;
	1192
	1193	if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
	1194	goto out;
	1195
	1196	if (iop_param.iop_iotype != IOPOL_TYPE_DISK) {
	1197	error = EINVAL;
	1198	goto out;
	1199	}
	1200
	1201	switch (iop_param.iop_scope) {
	1202	case IOPOL_SCOPE_PROCESS:
	1203	processwide = 1;
	1204	break;
	1205	case IOPOL_SCOPE_THREAD:
	1206	processwide = 0;
	1207	break;
	1208	default:
	1209	error = EINVAL;
	1210	goto out;
	1211	}
	1212
	1213	switch(uap->cmd) {
	1214	case IOPOL_CMD_SET:
	1215	switch (iop_param.iop_policy) {
	1216	case IOPOL_DEFAULT:
	1217	case IOPOL_NORMAL:
	1218	case IOPOL_THROTTLE:
	1219	case IOPOL_PASSIVE:
	1220	case IOPOL_UTILITY:
	1221	if(processwide != 0)
	1222	proc_apply_task_diskacc(current_task(), iop_param.iop_policy);
	1223	else
	1224	proc_apply_thread_selfdiskacc(iop_param.iop_policy);
	1225
	1226	break;
	1227	default:
	1228	error = EINVAL;
	1229	goto out;
	1230	}
	1231	break;
	1232
	1233	case IOPOL_CMD_GET:
	1234	if(processwide != 0)
	1235	iop_param.iop_policy = proc_get_task_disacc(current_task());
	1236	else
	1237	iop_param.iop_policy = proc_get_thread_selfdiskacc();
	1238
	1239	error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
	1240
	1241	break;
	1242	default:
	1243	error = EINVAL; // unknown command
	1244	break;
	1245	}
	1246
	1247	out:
	1248	*retval = error;
	1249	return (error);
	1250	}
	1251
	1252
	1253	boolean_t thread_is_io_throttled(void);
	1254
	1255	boolean_t
	1256	thread_is_io_throttled(void)
	1257	{
	1258	return(proc_get_task_selfdiskacc() == IOPOL_THROTTLE);
	1259	}
	1260
	1261	void
	1262	proc_apply_task_networkbg(void * bsd_info)
	1263	{
	1264	proc_t p = PROC_NULL;
	1265	proc_t curp = (proc_t)bsd_info;
	1266	pid_t pid;
	1267
	1268	pid = curp->p_pid;
	1269	p = proc_find(pid);
	1270	if (p != PROC_NULL) {
	1271	do_background_socket(p, NULL, PRIO_DARWIN_BG);
	1272	proc_rele(p);
	1273	}
	1274	}
	1275
	1276	void
	1277	proc_restore_task_networkbg(void * bsd_info)
	1278	{
	1279	proc_t p = PROC_NULL;
	1280	proc_t curp = (proc_t)bsd_info;
	1281	pid_t pid;
	1282
	1283	pid = curp->p_pid;
	1284	p = proc_find(pid);
	1285	if (p != PROC_NULL) {
	1286	do_background_socket(p, NULL, 0);
	1287	proc_rele(p);
	1288	}
	1289
	1290	}
	1291
	1292	void
	1293	proc_set_task_networkbg(void * bsdinfo, int setbg)
	1294	{
	1295	if (setbg != 0)
	1296	proc_apply_task_networkbg(bsdinfo);
	1297	else
	1298	proc_restore_task_networkbg(bsdinfo);
	1299	}
	1300
	1301	void
	1302	proc_apply_task_networkbg_internal(proc_t p, thread_t thread)
	1303	{
	1304	if (p != PROC_NULL) {
	1305	do_background_socket(p, thread, PRIO_DARWIN_BG);
	1306	}
	1307	}
	1308	void
	1309	proc_restore_task_networkbg_internal(proc_t p, thread_t thread)
	1310	{
	1311	if (p != PROC_NULL) {
	1312	do_background_socket(p, thread, PRIO_DARWIN_BG);
	1313	}
	1314	}
	1315