]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_resource.c
xnu-3789.1.32.tar.gz
[apple/xnu.git] / bsd / kern / kern_resource.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
29 /*-
30 * Copyright (c) 1982, 1986, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
67 */
68 /*
69 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
70 * support for mandatory and extensible security protections. This notice
71 * is included in support of clause 2.2 (b) of the Apple Public License,
72 * Version 2.0.
73 */
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/sysctl.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/resourcevar.h>
81 #include <sys/malloc.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <machine/spl.h>
85
86 #include <sys/mount_internal.h>
87 #include <sys/sysproto.h>
88
89 #include <security/audit/audit.h>
90
91 #include <machine/vmparam.h>
92
93 #include <mach/mach_types.h>
94 #include <mach/time_value.h>
95 #include <mach/task.h>
96 #include <mach/task_info.h>
97 #include <mach/vm_map.h>
98 #include <mach/mach_vm.h>
99 #include <mach/thread_act.h> /* for thread_policy_set( ) */
100 #include <kern/thread.h>
101 #include <kern/policy_internal.h>
102
103 #include <kern/task.h>
104 #include <kern/clock.h> /* for absolutetime_to_microtime() */
105 #include <netinet/in.h> /* for TRAFFIC_MGT_SO_* */
106 #include <sys/socketvar.h> /* for struct socket */
107
108 #include <vm/vm_map.h>
109
110 #include <kern/assert.h>
111 #include <sys/resource.h>
112 #include <sys/priv.h>
113 #include <IOKit/IOBSD.h>
114
115 int donice(struct proc *curp, struct proc *chgp, int n);
116 int dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
117 int uthread_get_background_state(uthread_t);
118 static void do_background_socket(struct proc *p, thread_t thread);
119 static int do_background_thread(thread_t thread, int priority);
120 static int do_background_proc(struct proc *curp, struct proc *targetp, int priority);
121 static int set_gpudeny_proc(struct proc *curp, struct proc *targetp, int priority);
122 static int proc_set_darwin_role(proc_t curp, proc_t targetp, int priority);
123 static int proc_get_darwin_role(proc_t curp, proc_t targetp, int *priority);
124 static int get_background_proc(struct proc *curp, struct proc *targetp, int *priority);
125 int proc_pid_rusage(int pid, int flavor, user_addr_t buf, int32_t *retval);
126 void gather_rusage_info(proc_t p, rusage_info_current *ru, int flavor);
127 int fill_task_rusage(task_t task, rusage_info_current *ri);
128 void fill_task_billed_usage(task_t task, rusage_info_current *ri);
129 int fill_task_io_rusage(task_t task, rusage_info_current *ri);
130 int fill_task_qos_rusage(task_t task, rusage_info_current *ri);
131 static void rusage_info_conversion(rusage_info_t ri_info, rusage_info_current *ri_current, int flavor);
132
133 int proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie);
134
135 rlim_t maxdmap = MAXDSIZ; /* XXX */
136 rlim_t maxsmap = MAXSSIZ - PAGE_MAX_SIZE; /* XXX */
137
138 /*
139 * Limits on the number of open files per process, and the number
140 * of child processes per process.
141 *
142 * Note: would be in kern/subr_param.c in FreeBSD.
143 */
144 __private_extern__ int maxfilesperproc = OPEN_MAX; /* per-proc open files limit */
145
146 SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW | CTLFLAG_LOCKED,
147 &maxprocperuid, 0, "Maximum processes allowed per userid" );
148
149 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW | CTLFLAG_LOCKED,
150 &maxfilesperproc, 0, "Maximum files allowed open per process" );
151
152 /* Args and fn for proc_iteration callback used in setpriority */
153 struct puser_nice_args {
154 proc_t curp;
155 int prio;
156 id_t who;
157 int * foundp;
158 int * errorp;
159 };
160 static int puser_donice_callback(proc_t p, void * arg);
161
162
163 /* Args and fn for proc_iteration callback used in setpriority */
164 struct ppgrp_nice_args {
165 proc_t curp;
166 int prio;
167 int * foundp;
168 int * errorp;
169 };
170 static int ppgrp_donice_callback(proc_t p, void * arg);
171
172 /*
173 * Resource controls and accounting.
174 */
175 int
176 getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval)
177 {
178 struct proc *p;
179 int low = PRIO_MAX + 1;
180 kauth_cred_t my_cred;
181 int refheld = 0;
182 int error = 0;
183
184 /* would also test (uap->who < 0), but id_t is unsigned */
185 if (uap->who > 0x7fffffff)
186 return (EINVAL);
187
188 switch (uap->which) {
189
190 case PRIO_PROCESS:
191 if (uap->who == 0) {
192 p = curp;
193 low = p->p_nice;
194 } else {
195 p = proc_find(uap->who);
196 if (p == 0)
197 break;
198 low = p->p_nice;
199 proc_rele(p);
200
201 }
202 break;
203
204 case PRIO_PGRP: {
205 struct pgrp *pg = PGRP_NULL;
206
207 if (uap->who == 0) {
208 /* returns the pgrp to ref */
209 pg = proc_pgrp(curp);
210 } else if ((pg = pgfind(uap->who)) == PGRP_NULL) {
211 break;
212 }
213 /* No need for iteration as it is a simple scan */
214 pgrp_lock(pg);
215 PGMEMBERS_FOREACH(pg, p) {
216 if (p->p_nice < low)
217 low = p->p_nice;
218 }
219 pgrp_unlock(pg);
220 pg_rele(pg);
221 break;
222 }
223
224 case PRIO_USER:
225 if (uap->who == 0)
226 uap->who = kauth_cred_getuid(kauth_cred_get());
227
228 proc_list_lock();
229
230 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
231 my_cred = kauth_cred_proc_ref(p);
232 if (kauth_cred_getuid(my_cred) == uap->who &&
233 p->p_nice < low)
234 low = p->p_nice;
235 kauth_cred_unref(&my_cred);
236 }
237
238 proc_list_unlock();
239
240 break;
241
242 case PRIO_DARWIN_THREAD:
243 /* we currently only support the current thread */
244 if (uap->who != 0)
245 return (EINVAL);
246
247 low = proc_get_thread_policy(current_thread(), TASK_POLICY_INTERNAL, TASK_POLICY_DARWIN_BG);
248
249 break;
250
251 case PRIO_DARWIN_PROCESS:
252 if (uap->who == 0) {
253 p = curp;
254 } else {
255 p = proc_find(uap->who);
256 if (p == PROC_NULL)
257 break;
258 refheld = 1;
259 }
260
261 error = get_background_proc(curp, p, &low);
262
263 if (refheld)
264 proc_rele(p);
265 if (error)
266 return (error);
267 break;
268
269 case PRIO_DARWIN_ROLE:
270 if (uap->who == 0) {
271 p = curp;
272 } else {
273 p = proc_find(uap->who);
274 if (p == PROC_NULL)
275 break;
276 refheld = 1;
277 }
278
279 error = proc_get_darwin_role(curp, p, &low);
280
281 if (refheld)
282 proc_rele(p);
283 if (error)
284 return (error);
285 break;
286
287 default:
288 return (EINVAL);
289 }
290 if (low == PRIO_MAX + 1)
291 return (ESRCH);
292 *retval = low;
293 return (0);
294 }
295
296 /* call back function used for proc iteration in PRIO_USER */
297 static int
298 puser_donice_callback(proc_t p, void * arg)
299 {
300 int error, n;
301 struct puser_nice_args * pun = (struct puser_nice_args *)arg;
302 kauth_cred_t my_cred;
303
304 my_cred = kauth_cred_proc_ref(p);
305 if (kauth_cred_getuid(my_cred) == pun->who) {
306 error = donice(pun->curp, p, pun->prio);
307 if (pun->errorp != NULL)
308 *pun->errorp = error;
309 if (pun->foundp != NULL) {
310 n = *pun->foundp;
311 *pun->foundp = n+1;
312 }
313 }
314 kauth_cred_unref(&my_cred);
315
316 return(PROC_RETURNED);
317 }
318
319 /* call back function used for proc iteration in PRIO_PGRP */
320 static int
321 ppgrp_donice_callback(proc_t p, void * arg)
322 {
323 int error;
324 struct ppgrp_nice_args * pun = (struct ppgrp_nice_args *)arg;
325 int n;
326
327 error = donice(pun->curp, p, pun->prio);
328 if (pun->errorp != NULL)
329 *pun->errorp = error;
330 if (pun->foundp!= NULL) {
331 n = *pun->foundp;
332 *pun->foundp = n+1;
333 }
334
335 return(PROC_RETURNED);
336 }
337
338 /*
339 * Returns: 0 Success
340 * EINVAL
341 * ESRCH
342 * donice:EPERM
343 * donice:EACCES
344 */
345 /* ARGSUSED */
346 int
347 setpriority(struct proc *curp, struct setpriority_args *uap, int32_t *retval)
348 {
349 struct proc *p;
350 int found = 0, error = 0;
351 int refheld = 0;
352
353 AUDIT_ARG(cmd, uap->which);
354 AUDIT_ARG(owner, uap->who, 0);
355 AUDIT_ARG(value32, uap->prio);
356
357 /* would also test (uap->who < 0), but id_t is unsigned */
358 if (uap->who > 0x7fffffff)
359 return (EINVAL);
360
361 switch (uap->which) {
362
363 case PRIO_PROCESS:
364 if (uap->who == 0)
365 p = curp;
366 else {
367 p = proc_find(uap->who);
368 if (p == 0)
369 break;
370 refheld = 1;
371 }
372 error = donice(curp, p, uap->prio);
373 found++;
374 if (refheld != 0)
375 proc_rele(p);
376 break;
377
378 case PRIO_PGRP: {
379 struct pgrp *pg = PGRP_NULL;
380 struct ppgrp_nice_args ppgrp;
381
382 if (uap->who == 0) {
383 pg = proc_pgrp(curp);
384 } else if ((pg = pgfind(uap->who)) == PGRP_NULL)
385 break;
386
387 ppgrp.curp = curp;
388 ppgrp.prio = uap->prio;
389 ppgrp.foundp = &found;
390 ppgrp.errorp = &error;
391
392 /* PGRP_DROPREF drops the reference on process group */
393 pgrp_iterate(pg, PGRP_DROPREF, ppgrp_donice_callback, (void *)&ppgrp, NULL, NULL);
394
395 break;
396 }
397
398 case PRIO_USER: {
399 struct puser_nice_args punice;
400
401 if (uap->who == 0)
402 uap->who = kauth_cred_getuid(kauth_cred_get());
403
404 punice.curp = curp;
405 punice.prio = uap->prio;
406 punice.who = uap->who;
407 punice.foundp = &found;
408 error = 0;
409 punice.errorp = &error;
410 proc_iterate(PROC_ALLPROCLIST, puser_donice_callback, (void *)&punice, NULL, NULL);
411
412 break;
413 }
414
415 case PRIO_DARWIN_THREAD: {
416 /* we currently only support the current thread */
417 if (uap->who != 0)
418 return (EINVAL);
419
420 error = do_background_thread(current_thread(), uap->prio);
421 found++;
422 break;
423 }
424
425 case PRIO_DARWIN_PROCESS: {
426 if (uap->who == 0)
427 p = curp;
428 else {
429 p = proc_find(uap->who);
430 if (p == 0)
431 break;
432 refheld = 1;
433 }
434
435 error = do_background_proc(curp, p, uap->prio);
436
437 found++;
438 if (refheld != 0)
439 proc_rele(p);
440 break;
441 }
442
443 case PRIO_DARWIN_GPU: {
444 if (uap->who == 0)
445 return (EINVAL);
446
447 p = proc_find(uap->who);
448 if (p == PROC_NULL)
449 break;
450
451 error = set_gpudeny_proc(curp, p, uap->prio);
452
453 found++;
454 proc_rele(p);
455 break;
456 }
457
458 case PRIO_DARWIN_ROLE: {
459 if (uap->who == 0) {
460 p = curp;
461 } else {
462 p = proc_find(uap->who);
463 if (p == PROC_NULL)
464 break;
465 refheld = 1;
466 }
467
468 error = proc_set_darwin_role(curp, p, uap->prio);
469
470 found++;
471 if (refheld != 0)
472 proc_rele(p);
473 break;
474 }
475
476 default:
477 return (EINVAL);
478 }
479 if (found == 0)
480 return (ESRCH);
481 if (error == EIDRM) {
482 *retval = -2;
483 error = 0;
484 }
485 return (error);
486 }
487
488
489 /*
490 * Returns: 0 Success
491 * EPERM
492 * EACCES
493 * mac_check_proc_sched:???
494 */
495 int
496 donice(struct proc *curp, struct proc *chgp, int n)
497 {
498 int error = 0;
499 kauth_cred_t ucred;
500 kauth_cred_t my_cred;
501
502 ucred = kauth_cred_proc_ref(curp);
503 my_cred = kauth_cred_proc_ref(chgp);
504
505 if (suser(ucred, NULL) && kauth_cred_getruid(ucred) &&
506 kauth_cred_getuid(ucred) != kauth_cred_getuid(my_cred) &&
507 kauth_cred_getruid(ucred) != kauth_cred_getuid(my_cred)) {
508 error = EPERM;
509 goto out;
510 }
511 if (n > PRIO_MAX)
512 n = PRIO_MAX;
513 if (n < PRIO_MIN)
514 n = PRIO_MIN;
515 if (n < chgp->p_nice && suser(ucred, &curp->p_acflag)) {
516 error = EACCES;
517 goto out;
518 }
519 #if CONFIG_MACF
520 error = mac_proc_check_sched(curp, chgp);
521 if (error)
522 goto out;
523 #endif
524 proc_lock(chgp);
525 chgp->p_nice = n;
526 proc_unlock(chgp);
527 (void)resetpriority(chgp);
528 out:
529 kauth_cred_unref(&ucred);
530 kauth_cred_unref(&my_cred);
531 return (error);
532 }
533
534 static int
535 set_gpudeny_proc(struct proc *curp, struct proc *targetp, int priority)
536 {
537 int error = 0;
538 kauth_cred_t ucred;
539 kauth_cred_t target_cred;
540
541 ucred = kauth_cred_get();
542 target_cred = kauth_cred_proc_ref(targetp);
543
544 /* TODO: Entitlement instead of uid check */
545
546 if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
547 kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
548 kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) {
549 error = EPERM;
550 goto out;
551 }
552
553 if (curp == targetp) {
554 error = EPERM;
555 goto out;
556 }
557
558 #if CONFIG_MACF
559 error = mac_proc_check_sched(curp, targetp);
560 if (error)
561 goto out;
562 #endif
563
564 switch (priority) {
565 case PRIO_DARWIN_GPU_DENY:
566 task_set_gpu_denied(proc_task(targetp), TRUE);
567 break;
568 case PRIO_DARWIN_GPU_ALLOW:
569 task_set_gpu_denied(proc_task(targetp), FALSE);
570 break;
571 default:
572 error = EINVAL;
573 goto out;
574 }
575
576 out:
577 kauth_cred_unref(&target_cred);
578 return (error);
579
580 }
581
582 static int
583 proc_set_darwin_role(proc_t curp, proc_t targetp, int priority)
584 {
585 int error = 0;
586 uint32_t flagsp;
587
588 kauth_cred_t ucred, target_cred;
589
590 ucred = kauth_cred_get();
591 target_cred = kauth_cred_proc_ref(targetp);
592
593 if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
594 kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
595 kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) {
596 if (priv_check_cred(ucred, PRIV_SETPRIORITY_DARWIN_ROLE, 0) != 0) {
597 error = EPERM;
598 goto out;
599 }
600 }
601
602 if (curp != targetp) {
603 #if CONFIG_MACF
604 if ((error = mac_proc_check_sched(curp, targetp)))
605 goto out;
606 #endif
607 }
608
609 proc_get_darwinbgstate(proc_task(targetp), &flagsp);
610 if ((flagsp & PROC_FLAG_APPLICATION) != PROC_FLAG_APPLICATION) {
611 error = ENOTSUP;
612 goto out;
613 }
614
615 integer_t role = 0;
616
617 if ((error = proc_darwin_role_to_task_role(priority, &role)))
618 goto out;
619
620 proc_set_task_policy(proc_task(targetp), TASK_POLICY_ATTRIBUTE,
621 TASK_POLICY_ROLE, role);
622
623 out:
624 kauth_cred_unref(&target_cred);
625 return (error);
626 }
627
628 static int
629 proc_get_darwin_role(proc_t curp, proc_t targetp, int *priority)
630 {
631 int error = 0;
632 int role = 0;
633
634 kauth_cred_t ucred, target_cred;
635
636 ucred = kauth_cred_get();
637 target_cred = kauth_cred_proc_ref(targetp);
638
639 if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
640 kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
641 kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) {
642 error = EPERM;
643 goto out;
644 }
645
646 if (curp != targetp) {
647 #if CONFIG_MACF
648 if ((error = mac_proc_check_sched(curp, targetp)))
649 goto out;
650 #endif
651 }
652
653 role = proc_get_task_policy(proc_task(targetp), TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
654
655 *priority = proc_task_role_to_darwin_role(role);
656
657 out:
658 kauth_cred_unref(&target_cred);
659 return (error);
660 }
661
662
663 static int
664 get_background_proc(struct proc *curp, struct proc *targetp, int *priority)
665 {
666 int external = 0;
667 int error = 0;
668 kauth_cred_t ucred, target_cred;
669
670 ucred = kauth_cred_get();
671 target_cred = kauth_cred_proc_ref(targetp);
672
673 if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
674 kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
675 kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) {
676 error = EPERM;
677 goto out;
678 }
679
680 external = (curp == targetp) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
681
682 *priority = proc_get_task_policy(current_task(), external, TASK_POLICY_DARWIN_BG);
683
684 out:
685 kauth_cred_unref(&target_cred);
686 return (error);
687 }
688
689 static int
690 do_background_proc(struct proc *curp, struct proc *targetp, int priority)
691 {
692 #if !CONFIG_MACF
693 #pragma unused(curp)
694 #endif
695 int error = 0;
696 kauth_cred_t ucred;
697 kauth_cred_t target_cred;
698 int external;
699 int enable;
700
701 ucred = kauth_cred_get();
702 target_cred = kauth_cred_proc_ref(targetp);
703
704 if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
705 kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
706 kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred))
707 {
708 error = EPERM;
709 goto out;
710 }
711
712 #if CONFIG_MACF
713 error = mac_proc_check_sched(curp, targetp);
714 if (error)
715 goto out;
716 #endif
717
718 external = (curp == targetp) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
719
720 switch (priority) {
721 case PRIO_DARWIN_BG:
722 enable = TASK_POLICY_ENABLE;
723 break;
724 case PRIO_DARWIN_NONUI:
725 /* ignored for compatibility */
726 goto out;
727 default:
728 /* TODO: EINVAL if priority != 0 */
729 enable = TASK_POLICY_DISABLE;
730 break;
731 }
732
733 proc_set_task_policy(proc_task(targetp), external, TASK_POLICY_DARWIN_BG, enable);
734
735 out:
736 kauth_cred_unref(&target_cred);
737 return (error);
738 }
739
740 static void
741 do_background_socket(struct proc *p, thread_t thread)
742 {
743 #if SOCKETS
744 struct filedesc *fdp;
745 struct fileproc *fp;
746 int i, background;
747
748 proc_fdlock(p);
749
750 if (thread != THREAD_NULL)
751 background = proc_get_effective_thread_policy(thread, TASK_POLICY_ALL_SOCKETS_BG);
752 else
753 background = proc_get_effective_task_policy(proc_task(p), TASK_POLICY_ALL_SOCKETS_BG);
754
755 if (background) {
756 /*
757 * For PRIO_DARWIN_PROCESS (thread is NULL), simply mark
758 * the sockets with the background flag. There's nothing
759 * to do here for the PRIO_DARWIN_THREAD case.
760 */
761 if (thread == THREAD_NULL) {
762 fdp = p->p_fd;
763
764 for (i = 0; i < fdp->fd_nfiles; i++) {
765 struct socket *sockp;
766
767 fp = fdp->fd_ofiles[i];
768 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
769 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) {
770 continue;
771 }
772 sockp = (struct socket *)fp->f_fglob->fg_data;
773 socket_set_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
774 sockp->so_background_thread = NULL;
775 }
776 }
777 } else {
778 /* disable networking IO throttle.
779 * NOTE - It is a known limitation of the current design that we
780 * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for
781 * sockets created by other threads within this process.
782 */
783 fdp = p->p_fd;
784 for ( i = 0; i < fdp->fd_nfiles; i++ ) {
785 struct socket *sockp;
786
787 fp = fdp->fd_ofiles[ i ];
788 if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 ||
789 FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET ) {
790 continue;
791 }
792 sockp = (struct socket *)fp->f_fglob->fg_data;
793 /* skip if only clearing this thread's sockets */
794 if ((thread) && (sockp->so_background_thread != thread)) {
795 continue;
796 }
797 socket_clear_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
798 sockp->so_background_thread = NULL;
799 }
800 }
801
802 proc_fdunlock(p);
803 #else
804 #pragma unused(p, thread)
805 #endif
806 }
807
808
809 /*
810 * do_background_thread
811 *
812 * Requires: thread reference
813 *
814 * Returns: 0 Success
815 * EPERM Tried to background while in vfork
816 * XXX - todo - does this need a MACF hook?
817 */
818 static int
819 do_background_thread(thread_t thread, int priority)
820 {
821 struct uthread *ut;
822 int enable, external;
823 int rv = 0;
824
825 ut = get_bsdthread_info(thread);
826
827 /* Backgrounding is unsupported for threads in vfork */
828 if ((ut->uu_flag & UT_VFORK) != 0)
829 return(EPERM);
830
831 /* Backgrounding is unsupported for workq threads */
832 if (thread_is_static_param(thread)) {
833 return(EPERM);
834 }
835
836 /* Not allowed to combine QoS and DARWIN_BG, doing so strips the QoS */
837 if (thread_has_qos_policy(thread)) {
838 thread_remove_qos_policy(thread);
839 rv = EIDRM;
840 }
841
842 /* TODO: Fail if someone passes something besides 0 or PRIO_DARWIN_BG */
843 enable = (priority == PRIO_DARWIN_BG) ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE;
844 external = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
845
846 proc_set_thread_policy(thread, external, TASK_POLICY_DARWIN_BG, enable);
847
848 return rv;
849 }
850
851
852 /*
853 * Returns: 0 Success
854 * copyin:EFAULT
855 * dosetrlimit:
856 */
857 /* ARGSUSED */
858 int
859 setrlimit(struct proc *p, struct setrlimit_args *uap, __unused int32_t *retval)
860 {
861 struct rlimit alim;
862 int error;
863
864 if ((error = copyin(uap->rlp, (caddr_t)&alim,
865 sizeof (struct rlimit))))
866 return (error);
867
868 return (dosetrlimit(p, uap->which, &alim));
869 }
870
871 /*
872 * Returns: 0 Success
873 * EINVAL
874 * ENOMEM Cannot copy limit structure
875 * suser:EPERM
876 *
877 * Notes: EINVAL is returned both for invalid arguments, and in the
878 * case that the current usage (e.g. RLIMIT_STACK) is already
879 * in excess of the requested limit.
880 */
881 int
882 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
883 {
884 struct rlimit *alimp;
885 int error;
886 kern_return_t kr;
887 int posix = (which & _RLIMIT_POSIX_FLAG) ? 1 : 0;
888
889 /* Mask out POSIX flag, saved above */
890 which &= ~_RLIMIT_POSIX_FLAG;
891
892 if (which >= RLIM_NLIMITS)
893 return (EINVAL);
894
895 alimp = &p->p_rlimit[which];
896 if (limp->rlim_cur > limp->rlim_max)
897 return EINVAL;
898
899 if (limp->rlim_cur > alimp->rlim_max ||
900 limp->rlim_max > alimp->rlim_max)
901 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
902 return (error);
903 }
904
905 proc_limitblock(p);
906
907 if ((error = proc_limitreplace(p)) != 0) {
908 proc_limitunblock(p);
909 return(error);
910 }
911
912 alimp = &p->p_rlimit[which];
913
914 switch (which) {
915
916 case RLIMIT_CPU:
917 if (limp->rlim_cur == RLIM_INFINITY) {
918 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
919 timerclear(&p->p_rlim_cpu);
920 }
921 else {
922 task_absolutetime_info_data_t tinfo;
923 mach_msg_type_number_t count;
924 struct timeval ttv, tv;
925 clock_sec_t tv_sec;
926 clock_usec_t tv_usec;
927
928 count = TASK_ABSOLUTETIME_INFO_COUNT;
929 task_info(p->task, TASK_ABSOLUTETIME_INFO,
930 (task_info_t)&tinfo, &count);
931 absolutetime_to_microtime(tinfo.total_user + tinfo.total_system,
932 &tv_sec, &tv_usec);
933 ttv.tv_sec = tv_sec;
934 ttv.tv_usec = tv_usec;
935
936 tv.tv_sec = (limp->rlim_cur > __INT_MAX__ ? __INT_MAX__ : limp->rlim_cur);
937 tv.tv_usec = 0;
938 timersub(&tv, &ttv, &p->p_rlim_cpu);
939
940 timerclear(&tv);
941 if (timercmp(&p->p_rlim_cpu, &tv, >))
942 task_vtimer_set(p->task, TASK_VTIMER_RLIM);
943 else {
944 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
945
946 timerclear(&p->p_rlim_cpu);
947
948 psignal(p, SIGXCPU);
949 }
950 }
951 break;
952
953 case RLIMIT_DATA:
954 if (limp->rlim_cur > maxdmap)
955 limp->rlim_cur = maxdmap;
956 if (limp->rlim_max > maxdmap)
957 limp->rlim_max = maxdmap;
958 break;
959
960 case RLIMIT_STACK:
961 /* Disallow illegal stack size instead of clipping */
962 if (limp->rlim_cur > maxsmap ||
963 limp->rlim_max > maxsmap) {
964 if (posix) {
965 error = EINVAL;
966 goto out;
967 }
968 else {
969 /*
970 * 4797860 - workaround poorly written installers by
971 * doing previous implementation (< 10.5) when caller
972 * is non-POSIX conforming.
973 */
974 if (limp->rlim_cur > maxsmap)
975 limp->rlim_cur = maxsmap;
976 if (limp->rlim_max > maxsmap)
977 limp->rlim_max = maxsmap;
978 }
979 }
980
981 /*
982 * Stack is allocated to the max at exec time with only
983 * "rlim_cur" bytes accessible. If stack limit is going
984 * up make more accessible, if going down make inaccessible.
985 */
986 if (limp->rlim_cur > alimp->rlim_cur) {
987 user_addr_t addr;
988 user_size_t size;
989
990 /* grow stack */
991 size = round_page_64(limp->rlim_cur);
992 size -= round_page_64(alimp->rlim_cur);
993
994 addr = p->user_stack - round_page_64(limp->rlim_cur);
995 kr = mach_vm_protect(current_map(),
996 addr, size,
997 FALSE, VM_PROT_DEFAULT);
998 if (kr != KERN_SUCCESS) {
999 error = EINVAL;
1000 goto out;
1001 }
1002 } else if (limp->rlim_cur < alimp->rlim_cur) {
1003 user_addr_t addr;
1004 user_size_t size;
1005 user_addr_t cur_sp;
1006
1007 /* shrink stack */
1008
1009 /*
1010 * First check if new stack limit would agree
1011 * with current stack usage.
1012 * Get the current thread's stack pointer...
1013 */
1014 cur_sp = thread_adjuserstack(current_thread(),
1015 0);
1016 if (cur_sp <= p->user_stack &&
1017 cur_sp > (p->user_stack -
1018 round_page_64(alimp->rlim_cur))) {
1019 /* stack pointer is in main stack */
1020 if (cur_sp <= (p->user_stack -
1021 round_page_64(limp->rlim_cur))) {
1022 /*
1023 * New limit would cause
1024 * current usage to be invalid:
1025 * reject new limit.
1026 */
1027 error = EINVAL;
1028 goto out;
1029 }
1030 } else {
1031 /* not on the main stack: reject */
1032 error = EINVAL;
1033 goto out;
1034 }
1035
1036 size = round_page_64(alimp->rlim_cur);
1037 size -= round_page_64(limp->rlim_cur);
1038
1039 addr = p->user_stack - round_page_64(alimp->rlim_cur);
1040
1041 kr = mach_vm_protect(current_map(),
1042 addr, size,
1043 FALSE, VM_PROT_NONE);
1044 if (kr != KERN_SUCCESS) {
1045 error = EINVAL;
1046 goto out;
1047 }
1048 } else {
1049 /* no change ... */
1050 }
1051 break;
1052
1053 case RLIMIT_NOFILE:
1054 /*
1055 * Only root can set the maxfiles limits, as it is
1056 * systemwide resource. If we are expecting POSIX behavior,
1057 * instead of clamping the value, return EINVAL. We do this
1058 * because historically, people have been able to attempt to
1059 * set RLIM_INFINITY to get "whatever the maximum is".
1060 */
1061 if ( kauth_cred_issuser(kauth_cred_get()) ) {
1062 if (limp->rlim_cur != alimp->rlim_cur &&
1063 limp->rlim_cur > (rlim_t)maxfiles) {
1064 if (posix) {
1065 error = EINVAL;
1066 goto out;
1067 }
1068 limp->rlim_cur = maxfiles;
1069 }
1070 if (limp->rlim_max != alimp->rlim_max &&
1071 limp->rlim_max > (rlim_t)maxfiles)
1072 limp->rlim_max = maxfiles;
1073 }
1074 else {
1075 if (limp->rlim_cur != alimp->rlim_cur &&
1076 limp->rlim_cur > (rlim_t)maxfilesperproc) {
1077 if (posix) {
1078 error = EINVAL;
1079 goto out;
1080 }
1081 limp->rlim_cur = maxfilesperproc;
1082 }
1083 if (limp->rlim_max != alimp->rlim_max &&
1084 limp->rlim_max > (rlim_t)maxfilesperproc)
1085 limp->rlim_max = maxfilesperproc;
1086 }
1087 break;
1088
1089 case RLIMIT_NPROC:
1090 /*
1091 * Only root can set to the maxproc limits, as it is
1092 * systemwide resource; all others are limited to
1093 * maxprocperuid (presumably less than maxproc).
1094 */
1095 if ( kauth_cred_issuser(kauth_cred_get()) ) {
1096 if (limp->rlim_cur > (rlim_t)maxproc)
1097 limp->rlim_cur = maxproc;
1098 if (limp->rlim_max > (rlim_t)maxproc)
1099 limp->rlim_max = maxproc;
1100 }
1101 else {
1102 if (limp->rlim_cur > (rlim_t)maxprocperuid)
1103 limp->rlim_cur = maxprocperuid;
1104 if (limp->rlim_max > (rlim_t)maxprocperuid)
1105 limp->rlim_max = maxprocperuid;
1106 }
1107 break;
1108
1109 case RLIMIT_MEMLOCK:
1110 /*
1111 * Tell the Mach VM layer about the new limit value.
1112 */
1113
1114 vm_map_set_user_wire_limit(current_map(), limp->rlim_cur);
1115 break;
1116
1117 } /* switch... */
1118 proc_lock(p);
1119 *alimp = *limp;
1120 proc_unlock(p);
1121 error = 0;
1122 out:
1123 proc_limitunblock(p);
1124 return (error);
1125 }
1126
1127 /* ARGSUSED */
1128 int
1129 getrlimit(struct proc *p, struct getrlimit_args *uap, __unused int32_t *retval)
1130 {
1131 struct rlimit lim;
1132
1133 /*
1134 * Take out flag now in case we need to use it to trigger variant
1135 * behaviour later.
1136 */
1137 uap->which &= ~_RLIMIT_POSIX_FLAG;
1138
1139 if (uap->which >= RLIM_NLIMITS)
1140 return (EINVAL);
1141 proc_limitget(p, uap->which, &lim);
1142 return (copyout((caddr_t)&lim,
1143 uap->rlp, sizeof (struct rlimit)));
1144 }
1145
1146 /*
1147 * Transform the running time and tick information in proc p into user,
1148 * system, and interrupt time usage.
1149 */
1150 /* No lock on proc is held for this.. */
1151 void
1152 calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip)
1153 {
1154 task_t task;
1155
1156 timerclear(up);
1157 timerclear(sp);
1158 if (ip != NULL)
1159 timerclear(ip);
1160
1161 task = p->task;
1162 if (task) {
1163 mach_task_basic_info_data_t tinfo;
1164 task_thread_times_info_data_t ttimesinfo;
1165 task_events_info_data_t teventsinfo;
1166 mach_msg_type_number_t task_info_count, task_ttimes_count;
1167 mach_msg_type_number_t task_events_count;
1168 struct timeval ut,st;
1169
1170 task_info_count = MACH_TASK_BASIC_INFO_COUNT;
1171 task_info(task, MACH_TASK_BASIC_INFO,
1172 (task_info_t)&tinfo, &task_info_count);
1173 ut.tv_sec = tinfo.user_time.seconds;
1174 ut.tv_usec = tinfo.user_time.microseconds;
1175 st.tv_sec = tinfo.system_time.seconds;
1176 st.tv_usec = tinfo.system_time.microseconds;
1177 timeradd(&ut, up, up);
1178 timeradd(&st, sp, sp);
1179
1180 task_ttimes_count = TASK_THREAD_TIMES_INFO_COUNT;
1181 task_info(task, TASK_THREAD_TIMES_INFO,
1182 (task_info_t)&ttimesinfo, &task_ttimes_count);
1183
1184 ut.tv_sec = ttimesinfo.user_time.seconds;
1185 ut.tv_usec = ttimesinfo.user_time.microseconds;
1186 st.tv_sec = ttimesinfo.system_time.seconds;
1187 st.tv_usec = ttimesinfo.system_time.microseconds;
1188 timeradd(&ut, up, up);
1189 timeradd(&st, sp, sp);
1190
1191 task_events_count = TASK_EVENTS_INFO_COUNT;
1192 task_info(task, TASK_EVENTS_INFO,
1193 (task_info_t)&teventsinfo, &task_events_count);
1194
1195 /*
1196 * No need to lock "p": this does not need to be
1197 * completely consistent, right ?
1198 */
1199 p->p_stats->p_ru.ru_minflt = (teventsinfo.faults -
1200 teventsinfo.pageins);
1201 p->p_stats->p_ru.ru_majflt = teventsinfo.pageins;
1202 p->p_stats->p_ru.ru_nivcsw = (teventsinfo.csw -
1203 p->p_stats->p_ru.ru_nvcsw);
1204 if (p->p_stats->p_ru.ru_nivcsw < 0)
1205 p->p_stats->p_ru.ru_nivcsw = 0;
1206
1207 p->p_stats->p_ru.ru_maxrss = tinfo.resident_size_max;
1208 }
1209 }
1210
1211 __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p);
1212 __private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p);
1213
1214 /* ARGSUSED */
1215 int
1216 getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval)
1217 {
1218 struct rusage *rup, rubuf;
1219 struct user64_rusage rubuf64;
1220 struct user32_rusage rubuf32;
1221 size_t retsize = sizeof(rubuf); /* default: 32 bits */
1222 caddr_t retbuf = (caddr_t)&rubuf; /* default: 32 bits */
1223 struct timeval utime;
1224 struct timeval stime;
1225
1226
1227 switch (uap->who) {
1228 case RUSAGE_SELF:
1229 calcru(p, &utime, &stime, NULL);
1230 proc_lock(p);
1231 rup = &p->p_stats->p_ru;
1232 rup->ru_utime = utime;
1233 rup->ru_stime = stime;
1234
1235 rubuf = *rup;
1236 proc_unlock(p);
1237
1238 break;
1239
1240 case RUSAGE_CHILDREN:
1241 proc_lock(p);
1242 rup = &p->p_stats->p_cru;
1243 rubuf = *rup;
1244 proc_unlock(p);
1245 break;
1246
1247 default:
1248 return (EINVAL);
1249 }
1250 if (IS_64BIT_PROCESS(p)) {
1251 retsize = sizeof(rubuf64);
1252 retbuf = (caddr_t)&rubuf64;
1253 munge_user64_rusage(&rubuf, &rubuf64);
1254 } else {
1255 retsize = sizeof(rubuf32);
1256 retbuf = (caddr_t)&rubuf32;
1257 munge_user32_rusage(&rubuf, &rubuf32);
1258 }
1259
1260 return (copyout(retbuf, uap->rusage, retsize));
1261 }
1262
1263 void
1264 ruadd(struct rusage *ru, struct rusage *ru2)
1265 {
1266 long *ip, *ip2;
1267 long i;
1268
1269 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
1270 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
1271 if (ru->ru_maxrss < ru2->ru_maxrss)
1272 ru->ru_maxrss = ru2->ru_maxrss;
1273 ip = &ru->ru_first; ip2 = &ru2->ru_first;
1274 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
1275 *ip++ += *ip2++;
1276 }
1277
1278 /*
1279 * Add the rusage stats of child in parent.
1280 *
1281 * It adds rusage statistics of child process and statistics of all its
1282 * children to its parent.
1283 *
1284 * Note: proc lock of parent should be held while calling this function.
1285 */
1286 void
1287 update_rusage_info_child(struct rusage_info_child *ri, rusage_info_current *ri_current)
1288 {
1289 ri->ri_child_user_time += (ri_current->ri_user_time +
1290 ri_current->ri_child_user_time);
1291 ri->ri_child_system_time += (ri_current->ri_system_time +
1292 ri_current->ri_child_system_time);
1293 ri->ri_child_pkg_idle_wkups += (ri_current->ri_pkg_idle_wkups +
1294 ri_current->ri_child_pkg_idle_wkups);
1295 ri->ri_child_interrupt_wkups += (ri_current->ri_interrupt_wkups +
1296 ri_current->ri_child_interrupt_wkups);
1297 ri->ri_child_pageins += (ri_current->ri_pageins +
1298 ri_current->ri_child_pageins);
1299 ri->ri_child_elapsed_abstime += ((ri_current->ri_proc_exit_abstime -
1300 ri_current->ri_proc_start_abstime) + ri_current->ri_child_elapsed_abstime);
1301 }
1302
1303 void
1304 proc_limitget(proc_t p, int which, struct rlimit * limp)
1305 {
1306 proc_list_lock();
1307 limp->rlim_cur = p->p_rlimit[which].rlim_cur;
1308 limp->rlim_max = p->p_rlimit[which].rlim_max;
1309 proc_list_unlock();
1310 }
1311
1312
1313 void
1314 proc_limitdrop(proc_t p, int exiting)
1315 {
1316 struct plimit * freelim = NULL;
1317 struct plimit * freeoldlim = NULL;
1318
1319 proc_list_lock();
1320
1321 if (--p->p_limit->pl_refcnt == 0) {
1322 freelim = p->p_limit;
1323 p->p_limit = NULL;
1324 }
1325 if ((exiting != 0) && (p->p_olimit != NULL) && (--p->p_olimit->pl_refcnt == 0)) {
1326 freeoldlim = p->p_olimit;
1327 p->p_olimit = NULL;
1328 }
1329
1330 proc_list_unlock();
1331 if (freelim != NULL)
1332 FREE_ZONE(freelim, sizeof *p->p_limit, M_PLIMIT);
1333 if (freeoldlim != NULL)
1334 FREE_ZONE(freeoldlim, sizeof *p->p_olimit, M_PLIMIT);
1335 }
1336
1337
1338 void
1339 proc_limitfork(proc_t parent, proc_t child)
1340 {
1341 proc_list_lock();
1342 child->p_limit = parent->p_limit;
1343 child->p_limit->pl_refcnt++;
1344 child->p_olimit = NULL;
1345 proc_list_unlock();
1346 }
1347
1348 void
1349 proc_limitblock(proc_t p)
1350 {
1351 proc_lock(p);
1352 while (p->p_lflag & P_LLIMCHANGE) {
1353 p->p_lflag |= P_LLIMWAIT;
1354 msleep(&p->p_olimit, &p->p_mlock, 0, "proc_limitblock", NULL);
1355 }
1356 p->p_lflag |= P_LLIMCHANGE;
1357 proc_unlock(p);
1358
1359 }
1360
1361
1362 void
1363 proc_limitunblock(proc_t p)
1364 {
1365 proc_lock(p);
1366 p->p_lflag &= ~P_LLIMCHANGE;
1367 if (p->p_lflag & P_LLIMWAIT) {
1368 p->p_lflag &= ~P_LLIMWAIT;
1369 wakeup(&p->p_olimit);
1370 }
1371 proc_unlock(p);
1372 }
1373
1374 /* This is called behind serialization provided by proc_limitblock/unlbock */
1375 int
1376 proc_limitreplace(proc_t p)
1377 {
1378 struct plimit *copy;
1379
1380
1381 proc_list_lock();
1382
1383 if (p->p_limit->pl_refcnt == 1) {
1384 proc_list_unlock();
1385 return(0);
1386 }
1387
1388 proc_list_unlock();
1389
1390 MALLOC_ZONE(copy, struct plimit *,
1391 sizeof(struct plimit), M_PLIMIT, M_WAITOK);
1392 if (copy == NULL) {
1393 return(ENOMEM);
1394 }
1395
1396 proc_list_lock();
1397 bcopy(p->p_limit->pl_rlimit, copy->pl_rlimit,
1398 sizeof(struct rlimit) * RLIM_NLIMITS);
1399 copy->pl_refcnt = 1;
1400 /* hang on to reference to old till process exits */
1401 p->p_olimit = p->p_limit;
1402 p->p_limit = copy;
1403 proc_list_unlock();
1404
1405 return(0);
1406 }
1407
1408 /*
1409 * iopolicysys
1410 *
1411 * Description: System call MUX for use in manipulating I/O policy attributes of the current process or thread
1412 *
1413 * Parameters: cmd Policy command
1414 * arg Pointer to policy arguments
1415 *
1416 * Returns: 0 Success
1417 * EINVAL Invalid command or invalid policy arguments
1418 *
1419 */
1420
1421 static int
1422 iopolicysys_disk(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param);
1423 static int
1424 iopolicysys_vfs(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param);
1425
1426 int
1427 iopolicysys(struct proc *p, struct iopolicysys_args *uap, int32_t *retval)
1428 {
1429 int error = 0;
1430 struct _iopol_param_t iop_param;
1431
1432 if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
1433 goto out;
1434
1435 switch (iop_param.iop_iotype) {
1436 case IOPOL_TYPE_DISK:
1437 error = iopolicysys_disk(p, uap->cmd, iop_param.iop_scope, iop_param.iop_policy, &iop_param);
1438 if (error == EIDRM) {
1439 *retval = -2;
1440 error = 0;
1441 }
1442 if (error)
1443 goto out;
1444 break;
1445 case IOPOL_TYPE_VFS_HFS_CASE_SENSITIVITY:
1446 error = iopolicysys_vfs(p, uap->cmd, iop_param.iop_scope, iop_param.iop_policy, &iop_param);
1447 if (error)
1448 goto out;
1449 break;
1450 default:
1451 error = EINVAL;
1452 goto out;
1453 }
1454
1455 /* Individual iotype handlers are expected to update iop_param, if requested with a GET command */
1456 if (uap->cmd == IOPOL_CMD_GET) {
1457 error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
1458 if (error)
1459 goto out;
1460 }
1461
1462 out:
1463 return (error);
1464 }
1465
1466 static int
1467 iopolicysys_disk(struct proc *p __unused, int cmd, int scope, int policy, struct _iopol_param_t *iop_param)
1468 {
1469 int error = 0;
1470 thread_t thread;
1471 int policy_flavor;
1472
1473 /* Validate scope */
1474 switch (scope) {
1475 case IOPOL_SCOPE_PROCESS:
1476 thread = THREAD_NULL;
1477 policy_flavor = TASK_POLICY_IOPOL;
1478 break;
1479
1480 case IOPOL_SCOPE_THREAD:
1481 thread = current_thread();
1482 policy_flavor = TASK_POLICY_IOPOL;
1483
1484 /* Not allowed to combine QoS and (non-PASSIVE) IO policy, doing so strips the QoS */
1485 if (cmd == IOPOL_CMD_SET && thread_has_qos_policy(thread)) {
1486 switch (policy) {
1487 case IOPOL_DEFAULT:
1488 case IOPOL_PASSIVE:
1489 break;
1490 case IOPOL_UTILITY:
1491 case IOPOL_THROTTLE:
1492 case IOPOL_IMPORTANT:
1493 case IOPOL_STANDARD:
1494 if (!thread_is_static_param(thread)) {
1495 thread_remove_qos_policy(thread);
1496 /*
1497 * This is not an error case, this is to return a marker to user-space that
1498 * we stripped the thread of its QoS class.
1499 */
1500 error = EIDRM;
1501 break;
1502 }
1503 /* otherwise, fall through to the error case. */
1504 default:
1505 error = EINVAL;
1506 goto out;
1507 }
1508 }
1509 break;
1510
1511 case IOPOL_SCOPE_DARWIN_BG:
1512 thread = THREAD_NULL;
1513 policy_flavor = TASK_POLICY_DARWIN_BG_IOPOL;
1514 break;
1515
1516 default:
1517 error = EINVAL;
1518 goto out;
1519 }
1520
1521 /* Validate policy */
1522 if (cmd == IOPOL_CMD_SET) {
1523 switch (policy) {
1524 case IOPOL_DEFAULT:
1525 if (scope == IOPOL_SCOPE_DARWIN_BG) {
1526 /* the current default BG throttle level is UTILITY */
1527 policy = IOPOL_UTILITY;
1528 } else {
1529 policy = IOPOL_IMPORTANT;
1530 }
1531 break;
1532 case IOPOL_UTILITY:
1533 /* fall-through */
1534 case IOPOL_THROTTLE:
1535 /* These levels are OK */
1536 break;
1537 case IOPOL_IMPORTANT:
1538 /* fall-through */
1539 case IOPOL_STANDARD:
1540 /* fall-through */
1541 case IOPOL_PASSIVE:
1542 if (scope == IOPOL_SCOPE_DARWIN_BG) {
1543 /* These levels are invalid for BG */
1544 error = EINVAL;
1545 goto out;
1546 } else {
1547 /* OK for other scopes */
1548 }
1549 break;
1550 default:
1551 error = EINVAL;
1552 goto out;
1553 }
1554 }
1555
1556 /* Perform command */
1557 switch(cmd) {
1558 case IOPOL_CMD_SET:
1559 if (thread != THREAD_NULL)
1560 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, policy_flavor, policy);
1561 else
1562 proc_set_task_policy(current_task(), TASK_POLICY_INTERNAL, policy_flavor, policy);
1563 break;
1564 case IOPOL_CMD_GET:
1565 if (thread != THREAD_NULL)
1566 policy = proc_get_thread_policy(thread, TASK_POLICY_INTERNAL, policy_flavor);
1567 else
1568 policy = proc_get_task_policy(current_task(), TASK_POLICY_INTERNAL, policy_flavor);
1569 iop_param->iop_policy = policy;
1570 break;
1571 default:
1572 error = EINVAL; /* unknown command */
1573 break;
1574 }
1575
1576 out:
1577 return (error);
1578 }
1579
1580 static int
1581 iopolicysys_vfs(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param)
1582 {
1583 int error = 0;
1584
1585 /* Validate scope */
1586 switch (scope) {
1587 case IOPOL_SCOPE_PROCESS:
1588 /* Only process OK */
1589 break;
1590 default:
1591 error = EINVAL;
1592 goto out;
1593 }
1594
1595 /* Validate policy */
1596 if (cmd == IOPOL_CMD_SET) {
1597 switch (policy) {
1598 case IOPOL_VFS_HFS_CASE_SENSITIVITY_DEFAULT:
1599 /* fall-through */
1600 case IOPOL_VFS_HFS_CASE_SENSITIVITY_FORCE_CASE_SENSITIVE:
1601 /* These policies are OK */
1602 break;
1603 default:
1604 error = EINVAL;
1605 goto out;
1606 }
1607 }
1608
1609 /* Perform command */
1610 switch(cmd) {
1611 case IOPOL_CMD_SET:
1612 if (0 == kauth_cred_issuser(kauth_cred_get())) {
1613 /* If it's a non-root process, it needs to have the entitlement to set the policy */
1614 boolean_t entitled = FALSE;
1615 entitled = IOTaskHasEntitlement(current_task(), "com.apple.private.iopol.case_sensitivity");
1616 if (!entitled) {
1617 error = EPERM;
1618 goto out;
1619 }
1620 }
1621
1622 switch (policy) {
1623 case IOPOL_VFS_HFS_CASE_SENSITIVITY_DEFAULT:
1624 OSBitAndAtomic16(~((uint32_t)P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY), &p->p_vfs_iopolicy);
1625 break;
1626 case IOPOL_VFS_HFS_CASE_SENSITIVITY_FORCE_CASE_SENSITIVE:
1627 OSBitOrAtomic16((uint32_t)P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY, &p->p_vfs_iopolicy);
1628 break;
1629 default:
1630 error = EINVAL;
1631 goto out;
1632 }
1633
1634 break;
1635 case IOPOL_CMD_GET:
1636 iop_param->iop_policy = (p->p_vfs_iopolicy & P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY)
1637 ? IOPOL_VFS_HFS_CASE_SENSITIVITY_FORCE_CASE_SENSITIVE
1638 : IOPOL_VFS_HFS_CASE_SENSITIVITY_DEFAULT;
1639 break;
1640 default:
1641 error = EINVAL; /* unknown command */
1642 break;
1643 }
1644
1645 out:
1646 return (error);
1647 }
1648
1649 /* BSD call back function for task_policy networking changes */
1650 void
1651 proc_apply_task_networkbg(void * bsd_info, thread_t thread)
1652 {
1653 assert(bsd_info != PROC_NULL);
1654
1655 pid_t pid = proc_pid((proc_t)bsd_info);
1656
1657 proc_t p = proc_find(pid);
1658
1659 if (p != PROC_NULL) {
1660 assert(p == (proc_t)bsd_info);
1661
1662 do_background_socket(p, thread);
1663 proc_rele(p);
1664 }
1665 }
1666
1667 void
1668 gather_rusage_info(proc_t p, rusage_info_current *ru, int flavor)
1669 {
1670 struct rusage_info_child *ri_child;
1671
1672 assert(p->p_stats != NULL);
1673 switch(flavor) {
1674
1675 case RUSAGE_INFO_V3:
1676 fill_task_qos_rusage(p->task, ru);
1677 fill_task_billed_usage(p->task, ru);
1678 /* fall through */
1679
1680 case RUSAGE_INFO_V2:
1681 fill_task_io_rusage(p->task, ru);
1682 /* fall through */
1683
1684 case RUSAGE_INFO_V1:
1685 /*
1686 * p->p_stats->ri_child statistics are protected under proc lock.
1687 */
1688 proc_lock(p);
1689
1690 ri_child = &(p->p_stats->ri_child);
1691 ru->ri_child_user_time = ri_child->ri_child_user_time;
1692 ru->ri_child_system_time = ri_child->ri_child_system_time;
1693 ru->ri_child_pkg_idle_wkups = ri_child->ri_child_pkg_idle_wkups;
1694 ru->ri_child_interrupt_wkups = ri_child->ri_child_interrupt_wkups;
1695 ru->ri_child_pageins = ri_child->ri_child_pageins;
1696 ru->ri_child_elapsed_abstime = ri_child->ri_child_elapsed_abstime;
1697
1698 proc_unlock(p);
1699 /* fall through */
1700
1701 case RUSAGE_INFO_V0:
1702 proc_getexecutableuuid(p, (unsigned char *)&ru->ri_uuid, sizeof (ru->ri_uuid));
1703 fill_task_rusage(p->task, ru);
1704 ru->ri_proc_start_abstime = p->p_stats->ps_start;
1705 }
1706 }
1707
1708 static void
1709 rusage_info_conversion(rusage_info_t ri_info, rusage_info_current *ri_current, int flavor)
1710 {
1711 struct rusage_info_v0 *ri_v0;
1712 struct rusage_info_v1 *ri_v1;
1713 struct rusage_info_v2 *ri_v2;
1714
1715 switch (flavor) {
1716
1717 case RUSAGE_INFO_V2:
1718 ri_v2 = (struct rusage_info_v2 *)ri_info;
1719 ri_v2->ri_diskio_bytesread = ri_current->ri_diskio_bytesread;
1720 ri_v2->ri_diskio_byteswritten = ri_current->ri_diskio_byteswritten;
1721 /* fall through */
1722
1723 case RUSAGE_INFO_V1:
1724 ri_v1 = (struct rusage_info_v1 *)ri_info;
1725 ri_v1->ri_child_user_time = ri_current->ri_child_user_time;
1726 ri_v1->ri_child_system_time = ri_current->ri_child_system_time;
1727 ri_v1->ri_child_pkg_idle_wkups = ri_current->ri_child_pkg_idle_wkups;
1728 ri_v1->ri_child_interrupt_wkups = ri_current->ri_child_interrupt_wkups;
1729 ri_v1->ri_child_pageins = ri_current->ri_child_pageins;
1730 ri_v1->ri_child_elapsed_abstime = ri_current->ri_child_elapsed_abstime;
1731 /* fall through */
1732
1733 case RUSAGE_INFO_V0:
1734 ri_v0 = (struct rusage_info_v0 *)ri_info;
1735 memcpy(&ri_v0->ri_uuid[0], &ri_current->ri_uuid[0], sizeof(ri_v0->ri_uuid));
1736 ri_v0->ri_user_time = ri_current->ri_user_time;
1737 ri_v0->ri_system_time = ri_current->ri_system_time;
1738 ri_v0->ri_pkg_idle_wkups = ri_current->ri_pkg_idle_wkups;
1739 ri_v0->ri_interrupt_wkups = ri_current->ri_interrupt_wkups;
1740 ri_v0->ri_pageins = ri_current->ri_pageins;
1741 ri_v0->ri_wired_size = ri_current->ri_wired_size;
1742 ri_v0->ri_resident_size = ri_current->ri_resident_size;
1743 ri_v0->ri_phys_footprint = ri_current->ri_phys_footprint;
1744 ri_v0->ri_proc_start_abstime = ri_current->ri_proc_start_abstime;
1745 ri_v0->ri_proc_exit_abstime = ri_current->ri_proc_exit_abstime;
1746
1747 break;
1748
1749 default:
1750 break;
1751 }
1752 }
1753
1754
1755 int
1756 proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie)
1757 {
1758 struct rusage_info_v0 ri_v0;
1759 struct rusage_info_v1 ri_v1;
1760 struct rusage_info_v2 ri_v2;
1761 struct rusage_info_v3 ri_v3;
1762
1763 rusage_info_current ri_current;
1764
1765 int error = 0;
1766
1767 switch (flavor) {
1768 case RUSAGE_INFO_V0:
1769 /*
1770 * If task is still alive, collect info from the live task itself.
1771 * Otherwise, look to the cached info in the zombie proc.
1772 */
1773 if (p->p_ru == NULL) {
1774 gather_rusage_info(p, &ri_current, flavor);
1775 ri_current.ri_proc_exit_abstime = 0;
1776 rusage_info_conversion(&ri_v0, &ri_current, flavor);
1777 } else {
1778 rusage_info_conversion(&ri_v0, &p->p_ru->ri, flavor);
1779 }
1780 error = copyout(&ri_v0, buffer, sizeof (ri_v0));
1781 break;
1782
1783 case RUSAGE_INFO_V1:
1784 /*
1785 * If task is still alive, collect info from the live task itself.
1786 * Otherwise, look to the cached info in the zombie proc.
1787 */
1788 if (p->p_ru == NULL) {
1789 gather_rusage_info(p, &ri_current, flavor);
1790 ri_current.ri_proc_exit_abstime = 0;
1791 rusage_info_conversion(&ri_v1, &ri_current, flavor);
1792 } else {
1793 rusage_info_conversion(&ri_v1, &p->p_ru->ri, flavor);
1794 }
1795 error = copyout(&ri_v1, buffer, sizeof (ri_v1));
1796 break;
1797
1798 case RUSAGE_INFO_V2:
1799 /*
1800 * If task is still alive, collect info from the live task itself.
1801 * Otherwise, look to the cached info in the zombie proc.
1802 */
1803 if (p->p_ru == NULL) {
1804 gather_rusage_info(p, &ri_current, flavor);
1805 ri_current.ri_proc_exit_abstime = 0;
1806 rusage_info_conversion(&ri_v2, &ri_current, flavor);
1807 } else {
1808 rusage_info_conversion(&ri_v2, &p->p_ru->ri, flavor);
1809 }
1810 error = copyout(&ri_v2, buffer, sizeof (ri_v2));
1811 break;
1812
1813 case RUSAGE_INFO_V3:
1814 /*
1815 * If task is still alive, collect info from the live task itself.
1816 * Otherwise, look to the cached info in the zombie proc.
1817 */
1818 if (p->p_ru == NULL) {
1819 gather_rusage_info(p, &ri_v3, flavor);
1820 ri_v3.ri_proc_exit_abstime = 0;
1821 } else {
1822 ri_v3 = p->p_ru->ri;
1823 }
1824 error = copyout(&ri_v3, buffer, sizeof (ri_v3));
1825 break;
1826
1827 default:
1828 error = EINVAL;
1829 break;
1830 }
1831
1832 return (error);
1833 }
1834
1835 static int
1836 mach_to_bsd_rv(int mach_rv)
1837 {
1838 int bsd_rv = 0;
1839
1840 switch (mach_rv) {
1841 case KERN_SUCCESS:
1842 bsd_rv = 0;
1843 break;
1844 case KERN_INVALID_ARGUMENT:
1845 bsd_rv = EINVAL;
1846 break;
1847 default:
1848 panic("unknown error %#x", mach_rv);
1849 }
1850
1851 return bsd_rv;
1852 }
1853
1854 /*
1855 * Resource limit controls
1856 *
1857 * uap->flavor available flavors:
1858 *
1859 * RLIMIT_WAKEUPS_MONITOR
1860 */
1861 int
1862 proc_rlimit_control(__unused struct proc *p, struct proc_rlimit_control_args *uap, __unused int32_t *retval)
1863 {
1864 proc_t targetp;
1865 int error = 0;
1866 struct proc_rlimit_control_wakeupmon wakeupmon_args;
1867 uint32_t cpumon_flags;
1868 uint32_t cpulimits_flags;
1869 kauth_cred_t my_cred, target_cred;
1870
1871 /* -1 implicitly means our own process (perhaps even the current thread for per-thread attributes) */
1872 if (uap->pid == -1) {
1873 targetp = proc_self();
1874 } else {
1875 targetp = proc_find(uap->pid);
1876 }
1877
1878 /* proc_self() can return NULL for an exiting process */
1879 if (targetp == PROC_NULL) {
1880 return (ESRCH);
1881 }
1882
1883 my_cred = kauth_cred_get();
1884 target_cred = kauth_cred_proc_ref(targetp);
1885
1886 if (!kauth_cred_issuser(my_cred) && kauth_cred_getruid(my_cred) &&
1887 kauth_cred_getuid(my_cred) != kauth_cred_getuid(target_cred) &&
1888 kauth_cred_getruid(my_cred) != kauth_cred_getuid(target_cred)) {
1889 proc_rele(targetp);
1890 kauth_cred_unref(&target_cred);
1891 return (EACCES);
1892 }
1893
1894 switch (uap->flavor) {
1895 case RLIMIT_WAKEUPS_MONITOR:
1896 if ((error = copyin(uap->arg, &wakeupmon_args, sizeof (wakeupmon_args))) != 0) {
1897 break;
1898 }
1899 if ((error = mach_to_bsd_rv(task_wakeups_monitor_ctl(targetp->task, &wakeupmon_args.wm_flags,
1900 &wakeupmon_args.wm_rate))) != 0) {
1901 break;
1902 }
1903 error = copyout(&wakeupmon_args, uap->arg, sizeof (wakeupmon_args));
1904 break;
1905 case RLIMIT_CPU_USAGE_MONITOR:
1906 cpumon_flags = uap->arg; // XXX temporarily stashing flags in argp (12592127)
1907 error = mach_to_bsd_rv(task_cpu_usage_monitor_ctl(targetp->task, &cpumon_flags));
1908 break;
1909 case RLIMIT_THREAD_CPULIMITS:
1910 cpulimits_flags = (uint32_t)uap->arg; // only need a limited set of bits, pass in void * argument
1911
1912 if (uap->pid != -1) {
1913 error = EINVAL;
1914 break;
1915 }
1916
1917 uint8_t percent = 0;
1918 uint32_t ms_refill = 0;
1919 uint64_t ns_refill;
1920
1921 percent = (uint8_t)(cpulimits_flags & 0xffU); /* low 8 bits for percent */
1922 ms_refill = (cpulimits_flags >> 8) & 0xffffff; /* next 24 bits represent ms refill value */
1923 if (percent >= 100) {
1924 error = EINVAL;
1925 break;
1926 }
1927
1928 ns_refill = ((uint64_t)ms_refill) * NSEC_PER_MSEC;
1929
1930 error = mach_to_bsd_rv(thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, percent, ns_refill));
1931 break;
1932 default:
1933 error = EINVAL;
1934 break;
1935 }
1936
1937 proc_rele(targetp);
1938 kauth_cred_unref(&target_cred);
1939
1940 /*
1941 * Return value from this function becomes errno to userland caller.
1942 */
1943 return (error);
1944 }
1945
1946 /*
1947 * Return the current amount of CPU consumed by this thread (in either user or kernel mode)
1948 */
1949 int thread_selfusage(struct proc *p __unused, struct thread_selfusage_args *uap __unused, uint64_t *retval)
1950 {
1951 uint64_t runtime;
1952
1953 runtime = thread_get_runtime_self();
1954 *retval = runtime;
1955
1956 return (0);
1957 }