]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_resource.c
ca41339ea902e04ab7d027c39d20e41d48a8e29e
[apple/xnu.git] / bsd / kern / kern_resource.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*-
30 * Copyright (c) 1982, 1986, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/sysctl.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/resourcevar.h>
81 #include <sys/malloc.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <machine/spl.h>
85
86 #include <sys/mount_internal.h>
87 #include <sys/sysproto.h>
88
89 #include <security/audit/audit.h>
90
91 #include <machine/vmparam.h>
92
93 #include <mach/mach_types.h>
94 #include <mach/time_value.h>
95 #include <mach/task.h>
96 #include <mach/task_info.h>
97 #include <mach/vm_map.h>
98 #include <mach/mach_vm.h>
99 #include <mach/thread_act.h> /* for thread_policy_set( ) */
100 #include <kern/lock.h>
101 #include <kern/thread.h>
102
103 #include <kern/task.h>
104 #include <kern/clock.h> /* for absolutetime_to_microtime() */
105 #include <netinet/in.h> /* for TRAFFIC_MGT_SO_* */
106 #include <sys/socketvar.h> /* for struct socket */
107
108 #include <vm/vm_map.h>
109
110 int donice(struct proc *curp, struct proc *chgp, int n);
111 int dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
112 int uthread_get_background_state(uthread_t);
113 static void do_background_socket(struct proc *p, thread_t thread, int priority);
114 static int do_background_thread(struct proc *curp, thread_t thread, int priority);
115 static int do_background_proc(struct proc *curp, struct proc *targetp, int priority);
116 void proc_apply_task_networkbg_internal(proc_t, thread_t);
117 void proc_restore_task_networkbg_internal(proc_t, thread_t);
118
119 rlim_t maxdmap = MAXDSIZ; /* XXX */
120 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE; /* XXX */
121
122 /*
123 * Limits on the number of open files per process, and the number
124 * of child processes per process.
125 *
126 * Note: would be in kern/subr_param.c in FreeBSD.
127 */
128 __private_extern__ int maxfilesperproc = OPEN_MAX; /* per-proc open files limit */
129
130 SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW | CTLFLAG_LOCKED,
131 &maxprocperuid, 0, "Maximum processes allowed per userid" );
132
133 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW | CTLFLAG_LOCKED,
134 &maxfilesperproc, 0, "Maximum files allowed open per process" );
135
136 /* Args and fn for proc_iteration callback used in setpriority */
137 struct puser_nice_args {
138 proc_t curp;
139 int prio;
140 id_t who;
141 int * foundp;
142 int * errorp;
143 };
144 static int puser_donice_callback(proc_t p, void * arg);
145
146
147 /* Args and fn for proc_iteration callback used in setpriority */
148 struct ppgrp_nice_args {
149 proc_t curp;
150 int prio;
151 int * foundp;
152 int * errorp;
153 };
154 static int ppgrp_donice_callback(proc_t p, void * arg);
155
156 /*
157 * Resource controls and accounting.
158 */
159 int
160 getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval)
161 {
162 struct proc *p;
163 int low = PRIO_MAX + 1;
164 kauth_cred_t my_cred;
165
166 /* would also test (uap->who < 0), but id_t is unsigned */
167 if (uap->who > 0x7fffffff)
168 return (EINVAL);
169
170 switch (uap->which) {
171
172 case PRIO_PROCESS:
173 if (uap->who == 0) {
174 p = curp;
175 low = p->p_nice;
176 } else {
177 p = proc_find(uap->who);
178 if (p == 0)
179 break;
180 low = p->p_nice;
181 proc_rele(p);
182
183 }
184 break;
185
186 case PRIO_PGRP: {
187 struct pgrp *pg = PGRP_NULL;
188
189 if (uap->who == 0) {
190 /* returns the pgrp to ref */
191 pg = proc_pgrp(curp);
192 } else if ((pg = pgfind(uap->who)) == PGRP_NULL) {
193 break;
194 }
195 /* No need for iteration as it is a simple scan */
196 pgrp_lock(pg);
197 for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) {
198 if (p->p_nice < low)
199 low = p->p_nice;
200 }
201 pgrp_unlock(pg);
202 pg_rele(pg);
203 break;
204 }
205
206 case PRIO_USER:
207 if (uap->who == 0)
208 uap->who = kauth_cred_getuid(kauth_cred_get());
209
210 proc_list_lock();
211
212 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
213 my_cred = kauth_cred_proc_ref(p);
214 if (kauth_cred_getuid(my_cred) == uap->who &&
215 p->p_nice < low)
216 low = p->p_nice;
217 kauth_cred_unref(&my_cred);
218 }
219
220 proc_list_unlock();
221
222 break;
223
224 case PRIO_DARWIN_THREAD: {
225 thread_t thread;
226 struct uthread *ut;
227
228 /* we currently only support the current thread */
229 if (uap->who != 0) {
230 return (EINVAL);
231 }
232
233 thread = current_thread();
234 ut = get_bsdthread_info(thread);
235
236 low = 0;
237 if ( (ut->uu_flag & UT_BACKGROUND_TRAFFIC_MGT) != 0 ) {
238 low = 1;
239 }
240 break;
241 }
242
243 default:
244 return (EINVAL);
245 }
246 if (low == PRIO_MAX + 1)
247 return (ESRCH);
248 *retval = low;
249 return (0);
250 }
251
252 /* call back function used for proc iteration in PRIO_USER */
253 static int
254 puser_donice_callback(proc_t p, void * arg)
255 {
256 int error, n;
257 struct puser_nice_args * pun = (struct puser_nice_args *)arg;
258 kauth_cred_t my_cred;
259
260 my_cred = kauth_cred_proc_ref(p);
261 if (kauth_cred_getuid(my_cred) == pun->who) {
262 error = donice(pun->curp, p, pun->prio);
263 if (pun->errorp != NULL)
264 *pun->errorp = error;
265 if (pun->foundp != NULL) {
266 n = *pun->foundp;
267 *pun->foundp = n+1;
268 }
269 }
270 kauth_cred_unref(&my_cred);
271
272 return(PROC_RETURNED);
273 }
274
275 /* call back function used for proc iteration in PRIO_PGRP */
276 static int
277 ppgrp_donice_callback(proc_t p, void * arg)
278 {
279 int error;
280 struct ppgrp_nice_args * pun = (struct ppgrp_nice_args *)arg;
281 int n;
282
283 error = donice(pun->curp, p, pun->prio);
284 if (pun->errorp != NULL)
285 *pun->errorp = error;
286 if (pun->foundp!= NULL) {
287 n = *pun->foundp;
288 *pun->foundp = n+1;
289 }
290
291 return(PROC_RETURNED);
292 }
293
294 /*
295 * Returns: 0 Success
296 * EINVAL
297 * ESRCH
298 * donice:EPERM
299 * donice:EACCES
300 */
301 /* ARGSUSED */
302 int
303 setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *retval)
304 {
305 struct proc *p;
306 int found = 0, error = 0;
307 int refheld = 0;
308
309 AUDIT_ARG(cmd, uap->which);
310 AUDIT_ARG(owner, uap->who, 0);
311 AUDIT_ARG(value32, uap->prio);
312
313 /* would also test (uap->who < 0), but id_t is unsigned */
314 if (uap->who > 0x7fffffff)
315 return (EINVAL);
316
317 switch (uap->which) {
318
319 case PRIO_PROCESS:
320 if (uap->who == 0)
321 p = curp;
322 else {
323 p = proc_find(uap->who);
324 if (p == 0)
325 break;
326 refheld = 1;
327 }
328 error = donice(curp, p, uap->prio);
329 found++;
330 if (refheld != 0)
331 proc_rele(p);
332 break;
333
334 case PRIO_PGRP: {
335 struct pgrp *pg = PGRP_NULL;
336 struct ppgrp_nice_args ppgrp;
337
338 if (uap->who == 0) {
339 pg = proc_pgrp(curp);
340 } else if ((pg = pgfind(uap->who)) == PGRP_NULL)
341 break;
342
343 ppgrp.curp = curp;
344 ppgrp.prio = uap->prio;
345 ppgrp.foundp = &found;
346 ppgrp.errorp = &error;
347
348 /* PGRP_DROPREF drops the reference on process group */
349 pgrp_iterate(pg, PGRP_DROPREF, ppgrp_donice_callback, (void *)&ppgrp, NULL, NULL);
350
351 break;
352 }
353
354 case PRIO_USER: {
355 struct puser_nice_args punice;
356
357 if (uap->who == 0)
358 uap->who = kauth_cred_getuid(kauth_cred_get());
359
360 punice.curp = curp;
361 punice.prio = uap->prio;
362 punice.who = uap->who;
363 punice.foundp = &found;
364 error = 0;
365 punice.errorp = &error;
366 proc_iterate(PROC_ALLPROCLIST, puser_donice_callback, (void *)&punice, NULL, NULL);
367
368 break;
369 }
370
371 case PRIO_DARWIN_THREAD: {
372 /* process marked for termination no priority management */
373 if ((curp->p_lflag & P_LPTERMINATE) != 0)
374 return(EINVAL);
375 /* we currently only support the current thread */
376 if (uap->who != 0) {
377 return (EINVAL);
378 }
379 error = do_background_thread(curp, current_thread(), uap->prio);
380 if (!error) {
381 (void) do_background_socket(curp, current_thread(), uap->prio);
382 }
383 found++;
384 break;
385 }
386
387 case PRIO_DARWIN_PROCESS: {
388 if (uap->who == 0)
389 p = curp;
390 else {
391 p = proc_find(uap->who);
392 if (p == 0)
393 break;
394 refheld = 1;
395 }
396
397 /* process marked for termination no priority management */
398 if ((p->p_lflag & P_LPTERMINATE) != 0) {
399 error = EINVAL;
400 } else {
401 error = do_background_proc(curp, p, uap->prio);
402 if (!error) {
403 (void) do_background_socket(p, NULL, uap->prio);
404 }
405
406 }
407 found++;
408 if (refheld != 0)
409 proc_rele(p);
410 break;
411 }
412
413 default:
414 return (EINVAL);
415 }
416 if (found == 0)
417 return (ESRCH);
418 return (error);
419 }
420
421
422 /*
423 * Returns: 0 Success
424 * EPERM
425 * EACCES
426 * mac_check_proc_sched:???
427 */
428 int
429 donice(struct proc *curp, struct proc *chgp, int n)
430 {
431 int error = 0;
432 kauth_cred_t ucred;
433 kauth_cred_t my_cred;
434
435 ucred = kauth_cred_proc_ref(curp);
436 my_cred = kauth_cred_proc_ref(chgp);
437
438 if (suser(ucred, NULL) && kauth_cred_getruid(ucred) &&
439 kauth_cred_getuid(ucred) != kauth_cred_getuid(my_cred) &&
440 kauth_cred_getruid(ucred) != kauth_cred_getuid(my_cred)) {
441 error = EPERM;
442 goto out;
443 }
444 if (n > PRIO_MAX)
445 n = PRIO_MAX;
446 if (n < PRIO_MIN)
447 n = PRIO_MIN;
448 if (n < chgp->p_nice && suser(ucred, &curp->p_acflag)) {
449 error = EACCES;
450 goto out;
451 }
452 #if CONFIG_MACF
453 error = mac_proc_check_sched(curp, chgp);
454 if (error)
455 goto out;
456 #endif
457 proc_lock(chgp);
458 chgp->p_nice = n;
459 proc_unlock(chgp);
460 (void)resetpriority(chgp);
461 out:
462 kauth_cred_unref(&ucred);
463 kauth_cred_unref(&my_cred);
464 return (error);
465 }
466
467 static int
468 do_background_proc(struct proc *curp, struct proc *targetp, int priority)
469 {
470 int error = 0;
471 kauth_cred_t ucred;
472 kauth_cred_t target_cred;
473
474 ucred = kauth_cred_get();
475 target_cred = kauth_cred_proc_ref(targetp);
476
477 if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
478 kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
479 kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred))
480 {
481 error = EPERM;
482 goto out;
483 }
484
485 #if CONFIG_MACF
486 error = mac_proc_check_sched(curp, targetp);
487 if (error)
488 goto out;
489 #endif
490
491 if (priority == PRIO_DARWIN_NONUI)
492 error = proc_apply_task_gpuacc(targetp->task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
493 else
494 error = proc_set_and_apply_bgtaskpolicy(targetp->task, priority);
495 if (error)
496 goto out;
497
498 out:
499 kauth_cred_unref(&target_cred);
500 return (error);
501 }
502
503 static void
504 do_background_socket(struct proc *p, thread_t thread, int priority)
505 {
506 struct filedesc *fdp;
507 struct fileproc *fp;
508 int i;
509
510 if (priority == PRIO_DARWIN_BG) {
511 /*
512 * For PRIO_DARWIN_PROCESS (thread is NULL), simply mark
513 * the sockets with the background flag. There's nothing
514 * to do here for the PRIO_DARWIN_THREAD case.
515 */
516 if (thread == NULL) {
517 proc_fdlock(p);
518 fdp = p->p_fd;
519
520 for (i = 0; i < fdp->fd_nfiles; i++) {
521 struct socket *sockp;
522
523 fp = fdp->fd_ofiles[i];
524 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
525 fp->f_fglob->fg_type != DTYPE_SOCKET) {
526 continue;
527 }
528 sockp = (struct socket *)fp->f_fglob->fg_data;
529 socket_set_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
530 sockp->so_background_thread = NULL;
531 }
532 proc_fdunlock(p);
533 }
534
535 } else {
536
537 /* disable networking IO throttle.
538 * NOTE - It is a known limitation of the current design that we
539 * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for
540 * sockets created by other threads within this process.
541 */
542 proc_fdlock(p);
543 fdp = p->p_fd;
544 for ( i = 0; i < fdp->fd_nfiles; i++ ) {
545 struct socket *sockp;
546
547 fp = fdp->fd_ofiles[ i ];
548 if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 ||
549 fp->f_fglob->fg_type != DTYPE_SOCKET ) {
550 continue;
551 }
552 sockp = (struct socket *)fp->f_fglob->fg_data;
553 /* skip if only clearing this thread's sockets */
554 if ((thread) && (sockp->so_background_thread != thread)) {
555 continue;
556 }
557 socket_clear_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
558 sockp->so_background_thread = NULL;
559 }
560 proc_fdunlock(p);
561 }
562 }
563
564
565 /*
566 * do_background_thread
567 * Returns: 0 Success
568 * XXX - todo - does this need a MACF hook?
569 *
570 * NOTE: To maintain binary compatibility with PRIO_DARWIN_THREAD with respect
571 * to network traffic management, UT_BACKGROUND_TRAFFIC_MGT is set/cleared
572 * along with UT_BACKGROUND flag, as the latter alone no longer implies
573 * any form of traffic regulation (it simply means that the thread is
574 * background.) With PRIO_DARWIN_PROCESS, any form of network traffic
575 * management must be explicitly requested via whatever means appropriate,
576 * and only TRAFFIC_MGT_SO_BACKGROUND is set via do_background_socket().
577 */
578 static int
579 do_background_thread(struct proc *curp __unused, thread_t thread, int priority)
580 {
581 struct uthread *ut;
582 int error = 0;
583
584 ut = get_bsdthread_info(thread);
585
586 /* Backgrounding is unsupported for threads in vfork */
587 if ( (ut->uu_flag & UT_VFORK) != 0) {
588 return(EPERM);
589 }
590
591 error = proc_set_and_apply_bgthreadpolicy(curp->task, thread_tid(thread), priority);
592 return(error);
593
594 }
595
596 #if CONFIG_EMBEDDED
597 int mach_do_background_thread(thread_t thread, int prio);
598
599 int
600 mach_do_background_thread(thread_t thread, int prio)
601 {
602 int error = 0;
603 struct proc *curp = NULL;
604 struct proc *targetp = NULL;
605 kauth_cred_t ucred;
606
607 targetp = get_bsdtask_info(get_threadtask(thread));
608 if (!targetp) {
609 return KERN_INVALID_ARGUMENT;
610 }
611
612 curp = proc_self();
613 if (curp == PROC_NULL) {
614 return KERN_FAILURE;
615 }
616
617 ucred = kauth_cred_proc_ref(curp);
618
619 if (suser(ucred, NULL) && curp != targetp) {
620 error = KERN_PROTECTION_FAILURE;
621 goto out;
622 }
623
624 error = do_background_thread(curp, thread, prio);
625 if (!error) {
626 (void) do_background_socket(curp, thread, prio);
627 } else {
628 if (error == EPERM) {
629 error = KERN_PROTECTION_FAILURE;
630 } else {
631 error = KERN_FAILURE;
632 }
633 }
634
635 out:
636 proc_rele(curp);
637 kauth_cred_unref(&ucred);
638 return error;
639 }
640 #endif /* CONFIG_EMBEDDED */
641
642 /*
643 * Returns: 0 Success
644 * copyin:EFAULT
645 * dosetrlimit:
646 */
647 /* ARGSUSED */
648 int
649 setrlimit(struct proc *p, struct setrlimit_args *uap, __unused int32_t *retval)
650 {
651 struct rlimit alim;
652 int error;
653
654 if ((error = copyin(uap->rlp, (caddr_t)&alim,
655 sizeof (struct rlimit))))
656 return (error);
657
658 return (dosetrlimit(p, uap->which, &alim));
659 }
660
661 /*
662 * Returns: 0 Success
663 * EINVAL
664 * ENOMEM Cannot copy limit structure
665 * suser:EPERM
666 *
667 * Notes: EINVAL is returned both for invalid arguments, and in the
668 * case that the current usage (e.g. RLIMIT_STACK) is already
669 * in excess of the requested limit.
670 */
671 int
672 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
673 {
674 struct rlimit *alimp;
675 int error;
676 kern_return_t kr;
677 int posix = (which & _RLIMIT_POSIX_FLAG) ? 1 : 0;
678
679 /* Mask out POSIX flag, saved above */
680 which &= ~_RLIMIT_POSIX_FLAG;
681
682 if (which >= RLIM_NLIMITS)
683 return (EINVAL);
684
685 alimp = &p->p_rlimit[which];
686 if (limp->rlim_cur > limp->rlim_max)
687 return EINVAL;
688
689 if (limp->rlim_cur > alimp->rlim_max ||
690 limp->rlim_max > alimp->rlim_max)
691 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
692 return (error);
693 }
694
695 proc_limitblock(p);
696
697 if ((error = proc_limitreplace(p)) != 0) {
698 proc_limitunblock(p);
699 return(error);
700 }
701
702 alimp = &p->p_rlimit[which];
703
704 switch (which) {
705
706 case RLIMIT_CPU:
707 if (limp->rlim_cur == RLIM_INFINITY) {
708 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
709 timerclear(&p->p_rlim_cpu);
710 }
711 else {
712 task_absolutetime_info_data_t tinfo;
713 mach_msg_type_number_t count;
714 struct timeval ttv, tv;
715 clock_sec_t tv_sec;
716 clock_usec_t tv_usec;
717
718 count = TASK_ABSOLUTETIME_INFO_COUNT;
719 task_info(p->task, TASK_ABSOLUTETIME_INFO,
720 (task_info_t)&tinfo, &count);
721 absolutetime_to_microtime(tinfo.total_user + tinfo.total_system,
722 &tv_sec, &tv_usec);
723 ttv.tv_sec = tv_sec;
724 ttv.tv_usec = tv_usec;
725
726 tv.tv_sec = (limp->rlim_cur > __INT_MAX__ ? __INT_MAX__ : limp->rlim_cur);
727 tv.tv_usec = 0;
728 timersub(&tv, &ttv, &p->p_rlim_cpu);
729
730 timerclear(&tv);
731 if (timercmp(&p->p_rlim_cpu, &tv, >))
732 task_vtimer_set(p->task, TASK_VTIMER_RLIM);
733 else {
734 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
735
736 timerclear(&p->p_rlim_cpu);
737
738 psignal(p, SIGXCPU);
739 }
740 }
741 break;
742
743 case RLIMIT_DATA:
744 if (limp->rlim_cur > maxdmap)
745 limp->rlim_cur = maxdmap;
746 if (limp->rlim_max > maxdmap)
747 limp->rlim_max = maxdmap;
748 break;
749
750 case RLIMIT_STACK:
751 /* Disallow illegal stack size instead of clipping */
752 if (limp->rlim_cur > maxsmap ||
753 limp->rlim_max > maxsmap) {
754 if (posix) {
755 error = EINVAL;
756 goto out;
757 }
758 else {
759 /*
760 * 4797860 - workaround poorly written installers by
761 * doing previous implementation (< 10.5) when caller
762 * is non-POSIX conforming.
763 */
764 if (limp->rlim_cur > maxsmap)
765 limp->rlim_cur = maxsmap;
766 if (limp->rlim_max > maxsmap)
767 limp->rlim_max = maxsmap;
768 }
769 }
770
771 /*
772 * Stack is allocated to the max at exec time with only
773 * "rlim_cur" bytes accessible. If stack limit is going
774 * up make more accessible, if going down make inaccessible.
775 */
776 if (limp->rlim_cur > alimp->rlim_cur) {
777 user_addr_t addr;
778 user_size_t size;
779
780 /* grow stack */
781 size = round_page_64(limp->rlim_cur);
782 size -= round_page_64(alimp->rlim_cur);
783
784 addr = p->user_stack - round_page_64(limp->rlim_cur);
785 kr = mach_vm_protect(current_map(),
786 addr, size,
787 FALSE, VM_PROT_DEFAULT);
788 if (kr != KERN_SUCCESS) {
789 error = EINVAL;
790 goto out;
791 }
792 } else if (limp->rlim_cur < alimp->rlim_cur) {
793 user_addr_t addr;
794 user_size_t size;
795 user_addr_t cur_sp;
796
797 /* shrink stack */
798
799 /*
800 * First check if new stack limit would agree
801 * with current stack usage.
802 * Get the current thread's stack pointer...
803 */
804 cur_sp = thread_adjuserstack(current_thread(),
805 0);
806 if (cur_sp <= p->user_stack &&
807 cur_sp > (p->user_stack -
808 round_page_64(alimp->rlim_cur))) {
809 /* stack pointer is in main stack */
810 if (cur_sp <= (p->user_stack -
811 round_page_64(limp->rlim_cur))) {
812 /*
813 * New limit would cause
814 * current usage to be invalid:
815 * reject new limit.
816 */
817 error = EINVAL;
818 goto out;
819 }
820 } else {
821 /* not on the main stack: reject */
822 error = EINVAL;
823 goto out;
824 }
825
826 size = round_page_64(alimp->rlim_cur);
827 size -= round_page_64(limp->rlim_cur);
828
829 addr = p->user_stack - round_page_64(alimp->rlim_cur);
830
831 kr = mach_vm_protect(current_map(),
832 addr, size,
833 FALSE, VM_PROT_NONE);
834 if (kr != KERN_SUCCESS) {
835 error = EINVAL;
836 goto out;
837 }
838 } else {
839 /* no change ... */
840 }
841 break;
842
843 case RLIMIT_NOFILE:
844 /*
845 * Only root can set the maxfiles limits, as it is
846 * systemwide resource. If we are expecting POSIX behavior,
847 * instead of clamping the value, return EINVAL. We do this
848 * because historically, people have been able to attempt to
849 * set RLIM_INFINITY to get "whatever the maximum is".
850 */
851 if ( is_suser() ) {
852 if (limp->rlim_cur != alimp->rlim_cur &&
853 limp->rlim_cur > (rlim_t)maxfiles) {
854 if (posix) {
855 error = EINVAL;
856 goto out;
857 }
858 limp->rlim_cur = maxfiles;
859 }
860 if (limp->rlim_max != alimp->rlim_max &&
861 limp->rlim_max > (rlim_t)maxfiles)
862 limp->rlim_max = maxfiles;
863 }
864 else {
865 if (limp->rlim_cur != alimp->rlim_cur &&
866 limp->rlim_cur > (rlim_t)maxfilesperproc) {
867 if (posix) {
868 error = EINVAL;
869 goto out;
870 }
871 limp->rlim_cur = maxfilesperproc;
872 }
873 if (limp->rlim_max != alimp->rlim_max &&
874 limp->rlim_max > (rlim_t)maxfilesperproc)
875 limp->rlim_max = maxfilesperproc;
876 }
877 break;
878
879 case RLIMIT_NPROC:
880 /*
881 * Only root can set to the maxproc limits, as it is
882 * systemwide resource; all others are limited to
883 * maxprocperuid (presumably less than maxproc).
884 */
885 if ( is_suser() ) {
886 if (limp->rlim_cur > (rlim_t)maxproc)
887 limp->rlim_cur = maxproc;
888 if (limp->rlim_max > (rlim_t)maxproc)
889 limp->rlim_max = maxproc;
890 }
891 else {
892 if (limp->rlim_cur > (rlim_t)maxprocperuid)
893 limp->rlim_cur = maxprocperuid;
894 if (limp->rlim_max > (rlim_t)maxprocperuid)
895 limp->rlim_max = maxprocperuid;
896 }
897 break;
898
899 case RLIMIT_MEMLOCK:
900 /*
901 * Tell the Mach VM layer about the new limit value.
902 */
903
904 vm_map_set_user_wire_limit(current_map(), limp->rlim_cur);
905 break;
906
907 } /* switch... */
908 proc_lock(p);
909 *alimp = *limp;
910 proc_unlock(p);
911 error = 0;
912 out:
913 proc_limitunblock(p);
914 return (error);
915 }
916
917 /* ARGSUSED */
918 int
919 getrlimit(struct proc *p, struct getrlimit_args *uap, __unused int32_t *retval)
920 {
921 struct rlimit lim;
922
923 /*
924 * Take out flag now in case we need to use it to trigger variant
925 * behaviour later.
926 */
927 uap->which &= ~_RLIMIT_POSIX_FLAG;
928
929 if (uap->which >= RLIM_NLIMITS)
930 return (EINVAL);
931 proc_limitget(p, uap->which, &lim);
932 return (copyout((caddr_t)&lim,
933 uap->rlp, sizeof (struct rlimit)));
934 }
935
936 /*
937 * Transform the running time and tick information in proc p into user,
938 * system, and interrupt time usage.
939 */
940 /* No lock on proc is held for this.. */
941 void
942 calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip)
943 {
944 task_t task;
945
946 timerclear(up);
947 timerclear(sp);
948 if (ip != NULL)
949 timerclear(ip);
950
951 task = p->task;
952 if (task) {
953 mach_task_basic_info_data_t tinfo;
954 task_thread_times_info_data_t ttimesinfo;
955 task_events_info_data_t teventsinfo;
956 mach_msg_type_number_t task_info_count, task_ttimes_count;
957 mach_msg_type_number_t task_events_count;
958 struct timeval ut,st;
959
960 task_info_count = MACH_TASK_BASIC_INFO_COUNT;
961 task_info(task, MACH_TASK_BASIC_INFO,
962 (task_info_t)&tinfo, &task_info_count);
963 ut.tv_sec = tinfo.user_time.seconds;
964 ut.tv_usec = tinfo.user_time.microseconds;
965 st.tv_sec = tinfo.system_time.seconds;
966 st.tv_usec = tinfo.system_time.microseconds;
967 timeradd(&ut, up, up);
968 timeradd(&st, sp, sp);
969
970 task_ttimes_count = TASK_THREAD_TIMES_INFO_COUNT;
971 task_info(task, TASK_THREAD_TIMES_INFO,
972 (task_info_t)&ttimesinfo, &task_ttimes_count);
973
974 ut.tv_sec = ttimesinfo.user_time.seconds;
975 ut.tv_usec = ttimesinfo.user_time.microseconds;
976 st.tv_sec = ttimesinfo.system_time.seconds;
977 st.tv_usec = ttimesinfo.system_time.microseconds;
978 timeradd(&ut, up, up);
979 timeradd(&st, sp, sp);
980
981 task_events_count = TASK_EVENTS_INFO_COUNT;
982 task_info(task, TASK_EVENTS_INFO,
983 (task_info_t)&teventsinfo, &task_events_count);
984
985 /*
986 * No need to lock "p": this does not need to be
987 * completely consistent, right ?
988 */
989 p->p_stats->p_ru.ru_minflt = (teventsinfo.faults -
990 teventsinfo.pageins);
991 p->p_stats->p_ru.ru_majflt = teventsinfo.pageins;
992 p->p_stats->p_ru.ru_nivcsw = (teventsinfo.csw -
993 p->p_stats->p_ru.ru_nvcsw);
994 if (p->p_stats->p_ru.ru_nivcsw < 0)
995 p->p_stats->p_ru.ru_nivcsw = 0;
996
997 p->p_stats->p_ru.ru_maxrss = tinfo.resident_size_max;
998 }
999 }
1000
1001 __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p);
1002 __private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p);
1003
1004 /* ARGSUSED */
1005 int
1006 getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval)
1007 {
1008 struct rusage *rup, rubuf;
1009 struct user64_rusage rubuf64;
1010 struct user32_rusage rubuf32;
1011 size_t retsize = sizeof(rubuf); /* default: 32 bits */
1012 caddr_t retbuf = (caddr_t)&rubuf; /* default: 32 bits */
1013 struct timeval utime;
1014 struct timeval stime;
1015
1016
1017 switch (uap->who) {
1018 case RUSAGE_SELF:
1019 calcru(p, &utime, &stime, NULL);
1020 proc_lock(p);
1021 rup = &p->p_stats->p_ru;
1022 rup->ru_utime = utime;
1023 rup->ru_stime = stime;
1024
1025 rubuf = *rup;
1026 proc_unlock(p);
1027
1028 break;
1029
1030 case RUSAGE_CHILDREN:
1031 proc_lock(p);
1032 rup = &p->p_stats->p_cru;
1033 rubuf = *rup;
1034 proc_unlock(p);
1035 break;
1036
1037 default:
1038 return (EINVAL);
1039 }
1040 if (IS_64BIT_PROCESS(p)) {
1041 retsize = sizeof(rubuf64);
1042 retbuf = (caddr_t)&rubuf64;
1043 munge_user64_rusage(&rubuf, &rubuf64);
1044 } else {
1045 retsize = sizeof(rubuf32);
1046 retbuf = (caddr_t)&rubuf32;
1047 munge_user32_rusage(&rubuf, &rubuf32);
1048 }
1049
1050 return (copyout(retbuf, uap->rusage, retsize));
1051 }
1052
1053 void
1054 ruadd(struct rusage *ru, struct rusage *ru2)
1055 {
1056 long *ip, *ip2;
1057 long i;
1058
1059 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
1060 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
1061 if (ru->ru_maxrss < ru2->ru_maxrss)
1062 ru->ru_maxrss = ru2->ru_maxrss;
1063 ip = &ru->ru_first; ip2 = &ru2->ru_first;
1064 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
1065 *ip++ += *ip2++;
1066 }
1067
1068 void
1069 proc_limitget(proc_t p, int which, struct rlimit * limp)
1070 {
1071 proc_list_lock();
1072 limp->rlim_cur = p->p_rlimit[which].rlim_cur;
1073 limp->rlim_max = p->p_rlimit[which].rlim_max;
1074 proc_list_unlock();
1075 }
1076
1077
1078 void
1079 proc_limitdrop(proc_t p, int exiting)
1080 {
1081 struct plimit * freelim = NULL;
1082 struct plimit * freeoldlim = NULL;
1083
1084 proc_list_lock();
1085
1086 if (--p->p_limit->pl_refcnt == 0) {
1087 freelim = p->p_limit;
1088 p->p_limit = NULL;
1089 }
1090 if ((exiting != 0) && (p->p_olimit != NULL) && (--p->p_olimit->pl_refcnt == 0)) {
1091 freeoldlim = p->p_olimit;
1092 p->p_olimit = NULL;
1093 }
1094
1095 proc_list_unlock();
1096 if (freelim != NULL)
1097 FREE_ZONE(freelim, sizeof *p->p_limit, M_PLIMIT);
1098 if (freeoldlim != NULL)
1099 FREE_ZONE(freeoldlim, sizeof *p->p_olimit, M_PLIMIT);
1100 }
1101
1102
1103 void
1104 proc_limitfork(proc_t parent, proc_t child)
1105 {
1106 proc_list_lock();
1107 child->p_limit = parent->p_limit;
1108 child->p_limit->pl_refcnt++;
1109 child->p_olimit = NULL;
1110 proc_list_unlock();
1111 }
1112
1113 void
1114 proc_limitblock(proc_t p)
1115 {
1116 proc_lock(p);
1117 while (p->p_lflag & P_LLIMCHANGE) {
1118 p->p_lflag |= P_LLIMWAIT;
1119 msleep(&p->p_olimit, &p->p_mlock, 0, "proc_limitblock", NULL);
1120 }
1121 p->p_lflag |= P_LLIMCHANGE;
1122 proc_unlock(p);
1123
1124 }
1125
1126
1127 void
1128 proc_limitunblock(proc_t p)
1129 {
1130 proc_lock(p);
1131 p->p_lflag &= ~P_LLIMCHANGE;
1132 if (p->p_lflag & P_LLIMWAIT) {
1133 p->p_lflag &= ~P_LLIMWAIT;
1134 wakeup(&p->p_olimit);
1135 }
1136 proc_unlock(p);
1137 }
1138
1139 /* This is called behind serialization provided by proc_limitblock/unlbock */
1140 int
1141 proc_limitreplace(proc_t p)
1142 {
1143 struct plimit *copy;
1144
1145
1146 proc_list_lock();
1147
1148 if (p->p_limit->pl_refcnt == 1) {
1149 proc_list_unlock();
1150 return(0);
1151 }
1152
1153 proc_list_unlock();
1154
1155 MALLOC_ZONE(copy, struct plimit *,
1156 sizeof(struct plimit), M_PLIMIT, M_WAITOK);
1157 if (copy == NULL) {
1158 return(ENOMEM);
1159 }
1160
1161 proc_list_lock();
1162 bcopy(p->p_limit->pl_rlimit, copy->pl_rlimit,
1163 sizeof(struct rlimit) * RLIM_NLIMITS);
1164 copy->pl_refcnt = 1;
1165 /* hang on to reference to old till process exits */
1166 p->p_olimit = p->p_limit;
1167 p->p_limit = copy;
1168 proc_list_unlock();
1169
1170 return(0);
1171 }
1172
1173
1174 /*
1175 * iopolicysys
1176 *
1177 * Description: System call MUX for use in manipulating I/O policy attributes of the current process or thread
1178 *
1179 * Parameters: cmd Policy command
1180 * arg Pointer to policy arguments
1181 *
1182 * Returns: 0 Success
1183 * EINVAL Invalid command or invalid policy arguments
1184 *
1185 */
1186 int
1187 iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused int32_t *retval)
1188 {
1189 int error = 0;
1190 struct _iopol_param_t iop_param;
1191 int processwide = 0;
1192
1193 if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
1194 goto out;
1195
1196 if (iop_param.iop_iotype != IOPOL_TYPE_DISK) {
1197 error = EINVAL;
1198 goto out;
1199 }
1200
1201 switch (iop_param.iop_scope) {
1202 case IOPOL_SCOPE_PROCESS:
1203 processwide = 1;
1204 break;
1205 case IOPOL_SCOPE_THREAD:
1206 processwide = 0;
1207 break;
1208 default:
1209 error = EINVAL;
1210 goto out;
1211 }
1212
1213 switch(uap->cmd) {
1214 case IOPOL_CMD_SET:
1215 switch (iop_param.iop_policy) {
1216 case IOPOL_DEFAULT:
1217 case IOPOL_NORMAL:
1218 case IOPOL_THROTTLE:
1219 case IOPOL_PASSIVE:
1220 case IOPOL_UTILITY:
1221 if(processwide != 0)
1222 proc_apply_task_diskacc(current_task(), iop_param.iop_policy);
1223 else
1224 proc_apply_thread_selfdiskacc(iop_param.iop_policy);
1225
1226 break;
1227 default:
1228 error = EINVAL;
1229 goto out;
1230 }
1231 break;
1232
1233 case IOPOL_CMD_GET:
1234 if(processwide != 0)
1235 iop_param.iop_policy = proc_get_task_disacc(current_task());
1236 else
1237 iop_param.iop_policy = proc_get_thread_selfdiskacc();
1238
1239 error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
1240
1241 break;
1242 default:
1243 error = EINVAL; // unknown command
1244 break;
1245 }
1246
1247 out:
1248 *retval = error;
1249 return (error);
1250 }
1251
1252
1253 boolean_t thread_is_io_throttled(void);
1254
1255 boolean_t
1256 thread_is_io_throttled(void)
1257 {
1258 return(proc_get_task_selfdiskacc() == IOPOL_THROTTLE);
1259 }
1260
1261 void
1262 proc_apply_task_networkbg(void * bsd_info)
1263 {
1264 proc_t p = PROC_NULL;
1265 proc_t curp = (proc_t)bsd_info;
1266 pid_t pid;
1267
1268 pid = curp->p_pid;
1269 p = proc_find(pid);
1270 if (p != PROC_NULL) {
1271 do_background_socket(p, NULL, PRIO_DARWIN_BG);
1272 proc_rele(p);
1273 }
1274 }
1275
1276 void
1277 proc_restore_task_networkbg(void * bsd_info)
1278 {
1279 proc_t p = PROC_NULL;
1280 proc_t curp = (proc_t)bsd_info;
1281 pid_t pid;
1282
1283 pid = curp->p_pid;
1284 p = proc_find(pid);
1285 if (p != PROC_NULL) {
1286 do_background_socket(p, NULL, 0);
1287 proc_rele(p);
1288 }
1289
1290 }
1291
1292 void
1293 proc_set_task_networkbg(void * bsdinfo, int setbg)
1294 {
1295 if (setbg != 0)
1296 proc_apply_task_networkbg(bsdinfo);
1297 else
1298 proc_restore_task_networkbg(bsdinfo);
1299 }
1300
1301 void
1302 proc_apply_task_networkbg_internal(proc_t p, thread_t thread)
1303 {
1304 if (p != PROC_NULL) {
1305 do_background_socket(p, thread, PRIO_DARWIN_BG);
1306 }
1307 }
1308 void
1309 proc_restore_task_networkbg_internal(proc_t p, thread_t thread)
1310 {
1311 if (p != PROC_NULL) {
1312 do_background_socket(p, thread, PRIO_DARWIN_BG);
1313 }
1314 }
1315