]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_resource.c
02b61872ab463d2ebf642bd66fac6081f56f90c7
[apple/xnu.git] / bsd / kern / kern_resource.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*-
30 * Copyright (c) 1982, 1986, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/sysctl.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/resourcevar.h>
81 #include <sys/malloc.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <machine/spl.h>
85
86 #include <sys/mount_internal.h>
87 #include <sys/sysproto.h>
88
89 #include <security/audit/audit.h>
90
91 #include <machine/vmparam.h>
92
93 #include <mach/mach_types.h>
94 #include <mach/time_value.h>
95 #include <mach/task.h>
96 #include <mach/task_info.h>
97 #include <mach/vm_map.h>
98 #include <mach/mach_vm.h>
99 #include <mach/thread_act.h> /* for thread_policy_set( ) */
100 #include <kern/lock.h>
101 #include <kern/thread.h>
102
103 #include <kern/task.h>
104 #include <kern/clock.h> /* for absolutetime_to_microtime() */
105 #include <netinet/in.h> /* for TRAFFIC_MGT_SO_BACKGROUND */
106 #include <sys/socketvar.h> /* for struct socket */
107
108 #include <vm/vm_map.h>
109
110 int donice(struct proc *curp, struct proc *chgp, int n);
111 int dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
112 static void do_background_socket(struct proc *curp, thread_t thread, int priority);
113 static int do_background_thread(struct proc *curp, int priority);
114 static int do_background_task(struct proc *curp, int priority);
115
116 rlim_t maxdmap = MAXDSIZ; /* XXX */
117 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE; /* XXX */
118
119 /*
120 * Limits on the number of open files per process, and the number
121 * of child processes per process.
122 *
123 * Note: would be in kern/subr_param.c in FreeBSD.
124 */
125 __private_extern__ int maxfilesperproc = OPEN_MAX; /* per-proc open files limit */
126
127 SYSCTL_INT( _kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW,
128 &maxprocperuid, 0, "Maximum processes allowed per userid" );
129
130 SYSCTL_INT( _kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
131 &maxfilesperproc, 0, "Maximum files allowed open per process" );
132
133 /* Args and fn for proc_iteration callback used in setpriority */
134 struct puser_nice_args {
135 proc_t curp;
136 int prio;
137 id_t who;
138 int * foundp;
139 int * errorp;
140 };
141 static int puser_donice_callback(proc_t p, void * arg);
142
143
144 /* Args and fn for proc_iteration callback used in setpriority */
145 struct ppgrp_nice_args {
146 proc_t curp;
147 int prio;
148 int * foundp;
149 int * errorp;
150 };
151 static int ppgrp_donice_callback(proc_t p, void * arg);
152
153 /*
154 * Resource controls and accounting.
155 */
156 int
157 getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval)
158 {
159 struct proc *p;
160 int low = PRIO_MAX + 1;
161 kauth_cred_t my_cred;
162
163 /* would also test (uap->who < 0), but id_t is unsigned */
164 if (uap->who > 0x7fffffff)
165 return (EINVAL);
166
167 switch (uap->which) {
168
169 case PRIO_PROCESS:
170 if (uap->who == 0) {
171 p = curp;
172 low = p->p_nice;
173 } else {
174 p = proc_find(uap->who);
175 if (p == 0)
176 break;
177 low = p->p_nice;
178 proc_rele(p);
179
180 }
181 break;
182
183 case PRIO_PGRP: {
184 struct pgrp *pg = PGRP_NULL;
185
186 if (uap->who == 0) {
187 /* returns the pgrp to ref */
188 pg = proc_pgrp(curp);
189 } else if ((pg = pgfind(uap->who)) == PGRP_NULL) {
190 break;
191 }
192 /* No need for iteration as it is a simple scan */
193 pgrp_lock(pg);
194 for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) {
195 if (p->p_nice < low)
196 low = p->p_nice;
197 }
198 pgrp_unlock(pg);
199 pg_rele(pg);
200 break;
201 }
202
203 case PRIO_USER:
204 if (uap->who == 0)
205 uap->who = kauth_cred_getuid(kauth_cred_get());
206
207 proc_list_lock();
208
209 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
210 my_cred = kauth_cred_proc_ref(p);
211 if (kauth_cred_getuid(my_cred) == uap->who &&
212 p->p_nice < low)
213 low = p->p_nice;
214 kauth_cred_unref(&my_cred);
215 }
216
217 proc_list_unlock();
218
219 break;
220
221 case PRIO_DARWIN_THREAD: {
222 thread_t thread;
223 struct uthread *ut;
224
225 /* we currently only support the current thread */
226 if (uap->who != 0) {
227 return (EINVAL);
228 }
229
230 thread = current_thread();
231 ut = get_bsdthread_info(thread);
232
233 low = 0;
234 if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
235 low = 1;
236 }
237 break;
238 }
239
240 default:
241 return (EINVAL);
242 }
243 if (low == PRIO_MAX + 1)
244 return (ESRCH);
245 *retval = low;
246 return (0);
247 }
248
249 /* call back function used for proc iteration in PRIO_USER */
250 static int
251 puser_donice_callback(proc_t p, void * arg)
252 {
253 int error, n;
254 struct puser_nice_args * pun = (struct puser_nice_args *)arg;
255 kauth_cred_t my_cred;
256
257 my_cred = kauth_cred_proc_ref(p);
258 if (kauth_cred_getuid(my_cred) == pun->who) {
259 error = donice(pun->curp, p, pun->prio);
260 if (pun->errorp != NULL)
261 *pun->errorp = error;
262 if (pun->foundp != NULL) {
263 n = *pun->foundp;
264 *pun->foundp = n+1;
265 }
266 }
267 kauth_cred_unref(&my_cred);
268
269 return(PROC_RETURNED);
270 }
271
272 /* call back function used for proc iteration in PRIO_PGRP */
273 static int
274 ppgrp_donice_callback(proc_t p, void * arg)
275 {
276 int error;
277 struct ppgrp_nice_args * pun = (struct ppgrp_nice_args *)arg;
278 int n;
279
280 error = donice(pun->curp, p, pun->prio);
281 if (pun->errorp != NULL)
282 *pun->errorp = error;
283 if (pun->foundp!= NULL) {
284 n = *pun->foundp;
285 *pun->foundp = n+1;
286 }
287
288 return(PROC_RETURNED);
289 }
290
291 /*
292 * Returns: 0 Success
293 * EINVAL
294 * ESRCH
295 * donice:EPERM
296 * donice:EACCES
297 */
298 /* ARGSUSED */
299 int
300 setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *retval)
301 {
302 struct proc *p;
303 int found = 0, error = 0;
304 int refheld = 0;
305
306 AUDIT_ARG(cmd, uap->which);
307 AUDIT_ARG(owner, uap->who, 0);
308 AUDIT_ARG(value32, uap->prio);
309
310 /* would also test (uap->who < 0), but id_t is unsigned */
311 if (uap->who > 0x7fffffff)
312 return (EINVAL);
313
314 switch (uap->which) {
315
316 case PRIO_PROCESS:
317 if (uap->who == 0)
318 p = curp;
319 else {
320 p = proc_find(uap->who);
321 if (p == 0)
322 break;
323 refheld = 1;
324 }
325 error = donice(curp, p, uap->prio);
326 found++;
327 if (refheld != 0)
328 proc_rele(p);
329 break;
330
331 case PRIO_PGRP: {
332 struct pgrp *pg = PGRP_NULL;
333 struct ppgrp_nice_args ppgrp;
334
335 if (uap->who == 0) {
336 pg = proc_pgrp(curp);
337 } else if ((pg = pgfind(uap->who)) == PGRP_NULL)
338 break;
339
340 ppgrp.curp = curp;
341 ppgrp.prio = uap->prio;
342 ppgrp.foundp = &found;
343 ppgrp.errorp = &error;
344
345 /* PGRP_DROPREF drops the reference on process group */
346 pgrp_iterate(pg, PGRP_DROPREF, ppgrp_donice_callback, (void *)&ppgrp, NULL, NULL);
347
348 break;
349 }
350
351 case PRIO_USER: {
352 struct puser_nice_args punice;
353
354 if (uap->who == 0)
355 uap->who = kauth_cred_getuid(kauth_cred_get());
356
357 punice.curp = curp;
358 punice.prio = uap->prio;
359 punice.who = uap->who;
360 punice.foundp = &found;
361 error = 0;
362 punice.errorp = &error;
363 proc_iterate(PROC_ALLPROCLIST, puser_donice_callback, (void *)&punice, NULL, NULL);
364
365 break;
366 }
367
368 case PRIO_DARWIN_THREAD: {
369 /* we currently only support the current thread */
370 if (uap->who != 0) {
371 return (EINVAL);
372 }
373 error = do_background_thread(curp, uap->prio);
374 (void) do_background_socket(curp, current_thread(), uap->prio);
375 found++;
376 break;
377 }
378
379 case PRIO_DARWIN_PROCESS: {
380 if (uap->who == 0)
381 p = curp;
382 else {
383 p = proc_find(uap->who);
384 if (p == 0)
385 break;
386 refheld = 1;
387 }
388
389 error = do_background_task(p, uap->prio);
390 (void) do_background_socket(p, NULL, uap->prio);
391
392 proc_lock(p);
393 p->p_iopol_disk = (uap->prio == PRIO_DARWIN_BG ?
394 IOPOL_THROTTLE : IOPOL_DEFAULT);
395 proc_unlock(p);
396
397 found++;
398 if (refheld != 0)
399 proc_rele(p);
400 break;
401 }
402
403 default:
404 return (EINVAL);
405 }
406 if (found == 0)
407 return (ESRCH);
408 return (error);
409 }
410
411
412 /*
413 * Returns: 0 Success
414 * EPERM
415 * EACCES
416 * mac_check_proc_sched:???
417 */
418 int
419 donice(struct proc *curp, struct proc *chgp, int n)
420 {
421 int error = 0;
422 kauth_cred_t ucred;
423 kauth_cred_t my_cred;
424
425 ucred = kauth_cred_proc_ref(curp);
426 my_cred = kauth_cred_proc_ref(chgp);
427
428 if (suser(ucred, NULL) && ucred->cr_ruid &&
429 kauth_cred_getuid(ucred) != kauth_cred_getuid(my_cred) &&
430 ucred->cr_ruid != kauth_cred_getuid(my_cred)) {
431 error = EPERM;
432 goto out;
433 }
434 if (n > PRIO_MAX)
435 n = PRIO_MAX;
436 if (n < PRIO_MIN)
437 n = PRIO_MIN;
438 if (n < chgp->p_nice && suser(ucred, &curp->p_acflag)) {
439 error = EACCES;
440 goto out;
441 }
442 #if CONFIG_MACF
443 error = mac_proc_check_sched(curp, chgp);
444 if (error)
445 goto out;
446 #endif
447 proc_lock(chgp);
448 chgp->p_nice = n;
449 proc_unlock(chgp);
450 (void)resetpriority(chgp);
451 out:
452 kauth_cred_unref(&ucred);
453 kauth_cred_unref(&my_cred);
454 return (error);
455 }
456
457 static int
458 do_background_task(struct proc *p, int priority)
459 {
460 int error = 0;
461 task_category_policy_data_t info;
462
463 if (priority & PRIO_DARWIN_BG) {
464 info.role = TASK_THROTTLE_APPLICATION;
465 } else {
466 info.role = TASK_DEFAULT_APPLICATION;
467 }
468
469 error = task_policy_set(p->task,
470 TASK_CATEGORY_POLICY,
471 (task_policy_t) &info,
472 TASK_CATEGORY_POLICY_COUNT);
473 return (error);
474 }
475
476 static void
477 do_background_socket(struct proc *curp, thread_t thread, int priority)
478 {
479 struct filedesc *fdp;
480 struct fileproc *fp;
481 int i;
482
483 if (priority & PRIO_DARWIN_BG) {
484 /* enable network throttle process-wide (if no thread is specified) */
485 if (thread == NULL) {
486 proc_fdlock(curp);
487 fdp = curp->p_fd;
488
489 for (i = 0; i < fdp->fd_nfiles; i++) {
490 struct socket *sockp;
491
492 fp = fdp->fd_ofiles[i];
493 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
494 fp->f_fglob->fg_type != DTYPE_SOCKET) {
495 continue;
496 }
497 sockp = (struct socket *)fp->f_fglob->fg_data;
498 sockp->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND;
499 sockp->so_background_thread = NULL;
500 }
501 proc_fdunlock(curp);
502 }
503
504 } else {
505 /* disable networking IO throttle.
506 * NOTE - It is a known limitation of the current design that we
507 * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for
508 * sockets created by other threads within this process.
509 */
510 proc_fdlock(curp);
511 fdp = curp->p_fd;
512 for ( i = 0; i < fdp->fd_nfiles; i++ ) {
513 struct socket *sockp;
514
515 fp = fdp->fd_ofiles[ i ];
516 if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 ||
517 fp->f_fglob->fg_type != DTYPE_SOCKET ) {
518 continue;
519 }
520 sockp = (struct socket *)fp->f_fglob->fg_data;
521 /* skip if only clearing this thread's sockets */
522 if ((thread) && (sockp->so_background_thread != thread)) {
523 continue;
524 }
525 sockp->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
526 sockp->so_background_thread = NULL;
527 }
528 proc_fdunlock(curp);
529 }
530 }
531
532
533 /*
534 * do_background_thread
535 * Returns: 0 Success
536 * XXX - todo - does this need a MACF hook?
537 */
538 static int
539 do_background_thread(struct proc *curp __unused, int priority)
540 {
541 thread_t thread;
542 struct uthread *ut;
543 thread_precedence_policy_data_t policy;
544
545 thread = current_thread();
546 ut = get_bsdthread_info(thread);
547
548 if ( (priority & PRIO_DARWIN_BG) == 0 ) {
549 /* turn off backgrounding of thread */
550 if ( (ut->uu_flag & UT_BACKGROUND) == 0 ) {
551 /* already off */
552 return(0);
553 }
554
555 /* clear background bit in thread and disable disk IO throttle */
556 ut->uu_flag &= ~UT_BACKGROUND;
557 ut->uu_iopol_disk = IOPOL_NORMAL;
558
559 /* reset thread priority (we did not save previous value) */
560 policy.importance = 0;
561 thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
562 (thread_policy_t)&policy,
563 THREAD_PRECEDENCE_POLICY_COUNT );
564 return(0);
565 }
566
567 /* background this thread */
568 if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
569 /* already backgrounded */
570 return(0);
571 }
572
573 /* tag thread as background and throttle disk IO */
574 ut->uu_flag |= UT_BACKGROUND;
575 ut->uu_iopol_disk = IOPOL_THROTTLE;
576
577 policy.importance = INT_MIN;
578 thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
579 (thread_policy_t)&policy,
580 THREAD_PRECEDENCE_POLICY_COUNT );
581
582 /* throttle networking IO happens in socket( ) syscall.
583 * If UT_BACKGROUND is set in the current thread then
584 * TRAFFIC_MGT_SO_BACKGROUND socket option is set.
585 */
586 return(0);
587 }
588
589
590 /*
591 * Returns: 0 Success
592 * copyin:EFAULT
593 * dosetrlimit:
594 */
595 /* ARGSUSED */
596 int
597 setrlimit(struct proc *p, struct setrlimit_args *uap, __unused int32_t *retval)
598 {
599 struct rlimit alim;
600 int error;
601
602 if ((error = copyin(uap->rlp, (caddr_t)&alim,
603 sizeof (struct rlimit))))
604 return (error);
605
606 return (dosetrlimit(p, uap->which, &alim));
607 }
608
609 /*
610 * Returns: 0 Success
611 * EINVAL
612 * ENOMEM Cannot copy limit structure
613 * suser:EPERM
614 *
615 * Notes: EINVAL is returned both for invalid arguments, and in the
616 * case that the current usage (e.g. RLIMIT_STACK) is already
617 * in excess of the requested limit.
618 */
619 int
620 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
621 {
622 struct rlimit *alimp;
623 int error;
624 kern_return_t kr;
625 int posix = (which & _RLIMIT_POSIX_FLAG) ? 1 : 0;
626
627 /* Mask out POSIX flag, saved above */
628 which &= ~_RLIMIT_POSIX_FLAG;
629
630 if (which >= RLIM_NLIMITS)
631 return (EINVAL);
632
633 alimp = &p->p_rlimit[which];
634 if (limp->rlim_cur > limp->rlim_max)
635 return EINVAL;
636
637 if (limp->rlim_cur > alimp->rlim_max ||
638 limp->rlim_max > alimp->rlim_max)
639 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
640 return (error);
641 }
642
643 proc_limitblock(p);
644
645 if ((error = proc_limitreplace(p)) != 0) {
646 proc_limitunblock(p);
647 return(error);
648 }
649
650 alimp = &p->p_rlimit[which];
651
652 switch (which) {
653
654 case RLIMIT_CPU:
655 if (limp->rlim_cur == RLIM_INFINITY) {
656 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
657 timerclear(&p->p_rlim_cpu);
658 }
659 else {
660 task_absolutetime_info_data_t tinfo;
661 mach_msg_type_number_t count;
662 struct timeval ttv, tv;
663 clock_sec_t tv_sec;
664 clock_usec_t tv_usec;
665
666 count = TASK_ABSOLUTETIME_INFO_COUNT;
667 task_info(p->task, TASK_ABSOLUTETIME_INFO,
668 (task_info_t)&tinfo, &count);
669 absolutetime_to_microtime(tinfo.total_user + tinfo.total_system,
670 &tv_sec, &tv_usec);
671 ttv.tv_sec = tv_sec;
672 ttv.tv_usec = tv_usec;
673
674 tv.tv_sec = (limp->rlim_cur > __INT_MAX__ ? __INT_MAX__ : limp->rlim_cur);
675 tv.tv_usec = 0;
676 timersub(&tv, &ttv, &p->p_rlim_cpu);
677
678 timerclear(&tv);
679 if (timercmp(&p->p_rlim_cpu, &tv, >))
680 task_vtimer_set(p->task, TASK_VTIMER_RLIM);
681 else {
682 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
683
684 timerclear(&p->p_rlim_cpu);
685
686 psignal(p, SIGXCPU);
687 }
688 }
689 break;
690
691 case RLIMIT_DATA:
692 if (limp->rlim_cur > maxdmap)
693 limp->rlim_cur = maxdmap;
694 if (limp->rlim_max > maxdmap)
695 limp->rlim_max = maxdmap;
696 break;
697
698 case RLIMIT_STACK:
699 /* Disallow illegal stack size instead of clipping */
700 if (limp->rlim_cur > maxsmap ||
701 limp->rlim_max > maxsmap) {
702 if (posix) {
703 error = EINVAL;
704 goto out;
705 }
706 else {
707 /*
708 * 4797860 - workaround poorly written installers by
709 * doing previous implementation (< 10.5) when caller
710 * is non-POSIX conforming.
711 */
712 if (limp->rlim_cur > maxsmap)
713 limp->rlim_cur = maxsmap;
714 if (limp->rlim_max > maxsmap)
715 limp->rlim_max = maxsmap;
716 }
717 }
718
719 /*
720 * Stack is allocated to the max at exec time with only
721 * "rlim_cur" bytes accessible. If stack limit is going
722 * up make more accessible, if going down make inaccessible.
723 */
724 if (limp->rlim_cur > alimp->rlim_cur) {
725 user_addr_t addr;
726 user_size_t size;
727
728 /* grow stack */
729 size = round_page_64(limp->rlim_cur);
730 size -= round_page_64(alimp->rlim_cur);
731
732 #if STACK_GROWTH_UP
733 /* go to top of current stack */
734 addr = p->user_stack + round_page_64(alimp->rlim_cur);
735 #else /* STACK_GROWTH_UP */
736 addr = p->user_stack - round_page_64(limp->rlim_cur);
737 #endif /* STACK_GROWTH_UP */
738 kr = mach_vm_protect(current_map(),
739 addr, size,
740 FALSE, VM_PROT_DEFAULT);
741 if (kr != KERN_SUCCESS) {
742 error = EINVAL;
743 goto out;
744 }
745 } else if (limp->rlim_cur < alimp->rlim_cur) {
746 user_addr_t addr;
747 user_size_t size;
748 user_addr_t cur_sp;
749
750 /* shrink stack */
751
752 /*
753 * First check if new stack limit would agree
754 * with current stack usage.
755 * Get the current thread's stack pointer...
756 */
757 cur_sp = thread_adjuserstack(current_thread(),
758 0);
759 #if STACK_GROWTH_UP
760 if (cur_sp >= p->user_stack &&
761 cur_sp < (p->user_stack +
762 round_page_64(alimp->rlim_cur))) {
763 /* current stack pointer is in main stack */
764 if (cur_sp >= (p->user_stack +
765 round_page_64(limp->rlim_cur))) {
766 /*
767 * New limit would cause
768 * current usage to be invalid:
769 * reject new limit.
770 */
771 error = EINVAL;
772 goto out;
773 }
774 } else {
775 /* not on the main stack: reject */
776 error = EINVAL;
777 goto out;
778 }
779
780 #else /* STACK_GROWTH_UP */
781 if (cur_sp <= p->user_stack &&
782 cur_sp > (p->user_stack -
783 round_page_64(alimp->rlim_cur))) {
784 /* stack pointer is in main stack */
785 if (cur_sp <= (p->user_stack -
786 round_page_64(limp->rlim_cur))) {
787 /*
788 * New limit would cause
789 * current usage to be invalid:
790 * reject new limit.
791 */
792 error = EINVAL;
793 goto out;
794 }
795 } else {
796 /* not on the main stack: reject */
797 error = EINVAL;
798 goto out;
799 }
800 #endif /* STACK_GROWTH_UP */
801
802 size = round_page_64(alimp->rlim_cur);
803 size -= round_page_64(limp->rlim_cur);
804
805 #if STACK_GROWTH_UP
806 addr = p->user_stack + round_page_64(limp->rlim_cur);
807 #else /* STACK_GROWTH_UP */
808 addr = p->user_stack - round_page_64(alimp->rlim_cur);
809 #endif /* STACK_GROWTH_UP */
810
811 kr = mach_vm_protect(current_map(),
812 addr, size,
813 FALSE, VM_PROT_NONE);
814 if (kr != KERN_SUCCESS) {
815 error = EINVAL;
816 goto out;
817 }
818 } else {
819 /* no change ... */
820 }
821 break;
822
823 case RLIMIT_NOFILE:
824 /*
825 * Only root can set the maxfiles limits, as it is
826 * systemwide resource. If we are expecting POSIX behavior,
827 * instead of clamping the value, return EINVAL. We do this
828 * because historically, people have been able to attempt to
829 * set RLIM_INFINITY to get "whatever the maximum is".
830 */
831 if ( is_suser() ) {
832 if (limp->rlim_cur != alimp->rlim_cur &&
833 limp->rlim_cur > (rlim_t)maxfiles) {
834 if (posix) {
835 error = EINVAL;
836 goto out;
837 }
838 limp->rlim_cur = maxfiles;
839 }
840 if (limp->rlim_max != alimp->rlim_max &&
841 limp->rlim_max > (rlim_t)maxfiles)
842 limp->rlim_max = maxfiles;
843 }
844 else {
845 if (limp->rlim_cur != alimp->rlim_cur &&
846 limp->rlim_cur > (rlim_t)maxfilesperproc) {
847 if (posix) {
848 error = EINVAL;
849 goto out;
850 }
851 limp->rlim_cur = maxfilesperproc;
852 }
853 if (limp->rlim_max != alimp->rlim_max &&
854 limp->rlim_max > (rlim_t)maxfilesperproc)
855 limp->rlim_max = maxfilesperproc;
856 }
857 break;
858
859 case RLIMIT_NPROC:
860 /*
861 * Only root can set to the maxproc limits, as it is
862 * systemwide resource; all others are limited to
863 * maxprocperuid (presumably less than maxproc).
864 */
865 if ( is_suser() ) {
866 if (limp->rlim_cur > (rlim_t)maxproc)
867 limp->rlim_cur = maxproc;
868 if (limp->rlim_max > (rlim_t)maxproc)
869 limp->rlim_max = maxproc;
870 }
871 else {
872 if (limp->rlim_cur > (rlim_t)maxprocperuid)
873 limp->rlim_cur = maxprocperuid;
874 if (limp->rlim_max > (rlim_t)maxprocperuid)
875 limp->rlim_max = maxprocperuid;
876 }
877 break;
878
879 case RLIMIT_MEMLOCK:
880 /*
881 * Tell the Mach VM layer about the new limit value.
882 */
883
884 vm_map_set_user_wire_limit(current_map(), limp->rlim_cur);
885 break;
886
887 } /* switch... */
888 proc_lock(p);
889 *alimp = *limp;
890 proc_unlock(p);
891 error = 0;
892 out:
893 proc_limitunblock(p);
894 return (error);
895 }
896
897 /* ARGSUSED */
898 int
899 getrlimit(struct proc *p, struct getrlimit_args *uap, __unused int32_t *retval)
900 {
901 struct rlimit lim;
902
903 /*
904 * Take out flag now in case we need to use it to trigger variant
905 * behaviour later.
906 */
907 uap->which &= ~_RLIMIT_POSIX_FLAG;
908
909 if (uap->which >= RLIM_NLIMITS)
910 return (EINVAL);
911 proc_limitget(p, uap->which, &lim);
912 return (copyout((caddr_t)&lim,
913 uap->rlp, sizeof (struct rlimit)));
914 }
915
916 /*
917 * Transform the running time and tick information in proc p into user,
918 * system, and interrupt time usage.
919 */
920 /* No lock on proc is held for this.. */
921 void
922 calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip)
923 {
924 task_t task;
925
926 timerclear(up);
927 timerclear(sp);
928 if (ip != NULL)
929 timerclear(ip);
930
931 task = p->task;
932 if (task) {
933 task_basic_info_32_data_t tinfo;
934 task_thread_times_info_data_t ttimesinfo;
935 task_events_info_data_t teventsinfo;
936 mach_msg_type_number_t task_info_count, task_ttimes_count;
937 mach_msg_type_number_t task_events_count;
938 struct timeval ut,st;
939
940 task_info_count = TASK_BASIC_INFO_32_COUNT;
941 task_info(task, TASK_BASIC2_INFO_32,
942 (task_info_t)&tinfo, &task_info_count);
943 ut.tv_sec = tinfo.user_time.seconds;
944 ut.tv_usec = tinfo.user_time.microseconds;
945 st.tv_sec = tinfo.system_time.seconds;
946 st.tv_usec = tinfo.system_time.microseconds;
947 timeradd(&ut, up, up);
948 timeradd(&st, sp, sp);
949
950 task_ttimes_count = TASK_THREAD_TIMES_INFO_COUNT;
951 task_info(task, TASK_THREAD_TIMES_INFO,
952 (task_info_t)&ttimesinfo, &task_ttimes_count);
953
954 ut.tv_sec = ttimesinfo.user_time.seconds;
955 ut.tv_usec = ttimesinfo.user_time.microseconds;
956 st.tv_sec = ttimesinfo.system_time.seconds;
957 st.tv_usec = ttimesinfo.system_time.microseconds;
958 timeradd(&ut, up, up);
959 timeradd(&st, sp, sp);
960
961 task_events_count = TASK_EVENTS_INFO_COUNT;
962 task_info(task, TASK_EVENTS_INFO,
963 (task_info_t)&teventsinfo, &task_events_count);
964
965 /*
966 * No need to lock "p": this does not need to be
967 * completely consistent, right ?
968 */
969 p->p_stats->p_ru.ru_minflt = (teventsinfo.faults -
970 teventsinfo.pageins);
971 p->p_stats->p_ru.ru_majflt = teventsinfo.pageins;
972 p->p_stats->p_ru.ru_nivcsw = (teventsinfo.csw -
973 p->p_stats->p_ru.ru_nvcsw);
974 if (p->p_stats->p_ru.ru_nivcsw < 0)
975 p->p_stats->p_ru.ru_nivcsw = 0;
976
977 p->p_stats->p_ru.ru_maxrss = tinfo.resident_size;
978 }
979 }
980
981 __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p);
982 __private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p);
983
984 /* ARGSUSED */
985 int
986 getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval)
987 {
988 struct rusage *rup, rubuf;
989 struct user64_rusage rubuf64;
990 struct user32_rusage rubuf32;
991 size_t retsize = sizeof(rubuf); /* default: 32 bits */
992 caddr_t retbuf = (caddr_t)&rubuf; /* default: 32 bits */
993 struct timeval utime;
994 struct timeval stime;
995
996
997 switch (uap->who) {
998 case RUSAGE_SELF:
999 calcru(p, &utime, &stime, NULL);
1000 proc_lock(p);
1001 rup = &p->p_stats->p_ru;
1002 rup->ru_utime = utime;
1003 rup->ru_stime = stime;
1004
1005 rubuf = *rup;
1006 proc_unlock(p);
1007
1008 break;
1009
1010 case RUSAGE_CHILDREN:
1011 proc_lock(p);
1012 rup = &p->p_stats->p_cru;
1013 rubuf = *rup;
1014 proc_unlock(p);
1015 break;
1016
1017 default:
1018 return (EINVAL);
1019 }
1020 if (IS_64BIT_PROCESS(p)) {
1021 retsize = sizeof(rubuf64);
1022 retbuf = (caddr_t)&rubuf64;
1023 munge_user64_rusage(&rubuf, &rubuf64);
1024 } else {
1025 retsize = sizeof(rubuf32);
1026 retbuf = (caddr_t)&rubuf32;
1027 munge_user32_rusage(&rubuf, &rubuf32);
1028 }
1029
1030 return (copyout(retbuf, uap->rusage, retsize));
1031 }
1032
1033 void
1034 ruadd(struct rusage *ru, struct rusage *ru2)
1035 {
1036 long *ip, *ip2;
1037 long i;
1038
1039 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
1040 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
1041 if (ru->ru_maxrss < ru2->ru_maxrss)
1042 ru->ru_maxrss = ru2->ru_maxrss;
1043 ip = &ru->ru_first; ip2 = &ru2->ru_first;
1044 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
1045 *ip++ += *ip2++;
1046 }
1047
1048 void
1049 proc_limitget(proc_t p, int which, struct rlimit * limp)
1050 {
1051 proc_list_lock();
1052 limp->rlim_cur = p->p_rlimit[which].rlim_cur;
1053 limp->rlim_max = p->p_rlimit[which].rlim_max;
1054 proc_list_unlock();
1055 }
1056
1057
1058 void
1059 proc_limitdrop(proc_t p, int exiting)
1060 {
1061 struct plimit * freelim = NULL;
1062 struct plimit * freeoldlim = NULL;
1063
1064 proc_list_lock();
1065
1066 if (--p->p_limit->pl_refcnt == 0) {
1067 freelim = p->p_limit;
1068 p->p_limit = NULL;
1069 }
1070 if ((exiting != 0) && (p->p_olimit != NULL) && (--p->p_olimit->pl_refcnt == 0)) {
1071 freeoldlim = p->p_olimit;
1072 p->p_olimit = NULL;
1073 }
1074
1075 proc_list_unlock();
1076 if (freelim != NULL)
1077 FREE_ZONE(freelim, sizeof *p->p_limit, M_PLIMIT);
1078 if (freeoldlim != NULL)
1079 FREE_ZONE(freeoldlim, sizeof *p->p_olimit, M_PLIMIT);
1080 }
1081
1082
1083 void
1084 proc_limitfork(proc_t parent, proc_t child)
1085 {
1086 proc_list_lock();
1087 child->p_limit = parent->p_limit;
1088 child->p_limit->pl_refcnt++;
1089 child->p_olimit = NULL;
1090 proc_list_unlock();
1091 }
1092
1093 void
1094 proc_limitblock(proc_t p)
1095 {
1096 proc_lock(p);
1097 while (p->p_lflag & P_LLIMCHANGE) {
1098 p->p_lflag |= P_LLIMWAIT;
1099 msleep(&p->p_olimit, &p->p_mlock, 0, "proc_limitblock", NULL);
1100 }
1101 p->p_lflag |= P_LLIMCHANGE;
1102 proc_unlock(p);
1103
1104 }
1105
1106
1107 void
1108 proc_limitunblock(proc_t p)
1109 {
1110 proc_lock(p);
1111 p->p_lflag &= ~P_LLIMCHANGE;
1112 if (p->p_lflag & P_LLIMWAIT) {
1113 p->p_lflag &= ~P_LLIMWAIT;
1114 wakeup(&p->p_olimit);
1115 }
1116 proc_unlock(p);
1117 }
1118
1119 /* This is called behind serialization provided by proc_limitblock/unlbock */
1120 int
1121 proc_limitreplace(proc_t p)
1122 {
1123 struct plimit *copy;
1124
1125
1126 proc_list_lock();
1127
1128 if (p->p_limit->pl_refcnt == 1) {
1129 proc_list_unlock();
1130 return(0);
1131 }
1132
1133 proc_list_unlock();
1134
1135 MALLOC_ZONE(copy, struct plimit *,
1136 sizeof(struct plimit), M_PLIMIT, M_WAITOK);
1137 if (copy == NULL) {
1138 return(ENOMEM);
1139 }
1140
1141 proc_list_lock();
1142 bcopy(p->p_limit->pl_rlimit, copy->pl_rlimit,
1143 sizeof(struct rlimit) * RLIM_NLIMITS);
1144 copy->pl_refcnt = 1;
1145 /* hang on to reference to old till process exits */
1146 p->p_olimit = p->p_limit;
1147 p->p_limit = copy;
1148 proc_list_unlock();
1149
1150 return(0);
1151 }
1152
1153
1154 /*
1155 * iopolicysys
1156 *
1157 * Description: System call MUX for use in manipulating I/O policy attributes of the current process or thread
1158 *
1159 * Parameters: cmd Policy command
1160 * arg Pointer to policy arguments
1161 *
1162 * Returns: 0 Success
1163 * EINVAL Invalid command or invalid policy arguments
1164 *
1165 */
1166 int
1167 iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused int32_t *retval)
1168 {
1169 int error = 0;
1170 thread_t thread = THREAD_NULL;
1171 int *policy;
1172 struct uthread *ut = NULL;
1173 struct _iopol_param_t iop_param;
1174
1175 if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
1176 goto exit;
1177
1178 if (iop_param.iop_iotype != IOPOL_TYPE_DISK) {
1179 error = EINVAL;
1180 goto exit;
1181 }
1182
1183 switch (iop_param.iop_scope) {
1184 case IOPOL_SCOPE_PROCESS:
1185 policy = &p->p_iopol_disk;
1186 break;
1187 case IOPOL_SCOPE_THREAD:
1188 thread = current_thread();
1189 ut = get_bsdthread_info(thread);
1190 policy = &ut->uu_iopol_disk;
1191 break;
1192 default:
1193 error = EINVAL;
1194 goto exit;
1195 }
1196
1197 switch(uap->cmd) {
1198 case IOPOL_CMD_SET:
1199 switch (iop_param.iop_policy) {
1200 case IOPOL_DEFAULT:
1201 case IOPOL_NORMAL:
1202 case IOPOL_THROTTLE:
1203 case IOPOL_PASSIVE:
1204 proc_lock(p);
1205 *policy = iop_param.iop_policy;
1206 proc_unlock(p);
1207 break;
1208 default:
1209 error = EINVAL;
1210 goto exit;
1211 }
1212 break;
1213 case IOPOL_CMD_GET:
1214 switch (*policy) {
1215 case IOPOL_DEFAULT:
1216 case IOPOL_NORMAL:
1217 case IOPOL_THROTTLE:
1218 case IOPOL_PASSIVE:
1219 iop_param.iop_policy = *policy;
1220 break;
1221 default: // in-kernel
1222 // this should never happen
1223 printf("%s: unknown I/O policy %d\n", __func__, *policy);
1224 // restore to default value
1225 *policy = IOPOL_DEFAULT;
1226 iop_param.iop_policy = *policy;
1227 }
1228
1229 error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
1230 break;
1231 default:
1232 error = EINVAL; // unknown command
1233 break;
1234 }
1235
1236 exit:
1237 *retval = error;
1238 return (error);
1239 }
1240
1241
1242 boolean_t thread_is_io_throttled(void);
1243
1244 boolean_t
1245 thread_is_io_throttled(void) {
1246
1247 int policy;
1248 struct uthread *ut;
1249
1250 policy = current_proc()->p_iopol_disk;
1251
1252 ut = get_bsdthread_info(current_thread());
1253
1254 if (ut->uu_iopol_disk != IOPOL_DEFAULT)
1255 policy = ut->uu_iopol_disk;
1256
1257 if (policy == IOPOL_THROTTLE)
1258 return TRUE;
1259
1260 return FALSE;
1261 }