bsd/kern/kern_resource.c

   1 /*
   2  * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*-
  30  * Copyright (c) 1982, 1986, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_resource.c     8.5 (Berkeley) 1/21/94
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/sysctl.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/resourcevar.h>
  81 #include <sys/malloc.h>
  82 #include <sys/proc_internal.h>
  83 #include <sys/kauth.h>
  84 #include <machine/spl.h>
  85
  86 #include <sys/mount_internal.h>
  87 #include <sys/sysproto.h>
  88
  89 #include <security/audit/audit.h>
  90
  91 #include <machine/vmparam.h>
  92
  93 #include <mach/mach_types.h>
  94 #include <mach/time_value.h>
  95 #include <mach/task.h>
  96 #include <mach/task_info.h>
  97 #include <mach/vm_map.h>
  98 #include <mach/mach_vm.h>
  99 #include <mach/thread_act.h>  /* for thread_policy_set( ) */
 100 #include <kern/lock.h>
 101 #include <kern/thread.h>
 102
 103 #include <kern/task.h>
 104 #include <kern/clock.h>         /* for absolutetime_to_microtime() */
 105 #include <netinet/in.h>         /* for TRAFFIC_MGT_SO_* */
 106 #include <sys/socketvar.h>      /* for struct socket */
 107
 108 #include <vm/vm_map.h>
 109
 110 int     donice(struct proc *curp, struct proc *chgp, int n);
 111 int     dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
 112 int     uthread_get_background_state(uthread_t);
 113 static void do_background_socket(struct proc *p, thread_t thread, int priority);
 114 static int do_background_thread(struct proc *curp, thread_t thread, int priority);
 115 static int do_background_proc(struct proc *curp, struct proc *targetp, int priority);
 116 void proc_apply_task_networkbg_internal(proc_t);
 117
 118 rlim_t maxdmap = MAXDSIZ;       /* XXX */
 119 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;   /* XXX */
 120
 121 /*
 122  * Limits on the number of open files per process, and the number
 123  * of child processes per process.
 124  *
 125  * Note: would be in kern/subr_param.c in FreeBSD.
 126  */
 127 __private_extern__ int maxfilesperproc = OPEN_MAX;              /* per-proc open files limit */
 128
 129 SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW | CTLFLAG_LOCKED,
 130                 &maxprocperuid, 0, "Maximum processes allowed per userid" );
 131
 132 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW | CTLFLAG_LOCKED,
 133                 &maxfilesperproc, 0, "Maximum files allowed open per process" );
 134
 135 /* Args and fn for proc_iteration callback used in setpriority */
 136 struct puser_nice_args {
 137         proc_t curp;
 138         int     prio;
 139         id_t    who;
 140         int *   foundp;
 141         int *   errorp;
 142 };
 143 static int puser_donice_callback(proc_t p, void * arg);
 144
 145
 146 /* Args and fn for proc_iteration callback used in setpriority */
 147 struct ppgrp_nice_args {
 148         proc_t curp;
 149         int     prio;
 150         int *   foundp;
 151         int *   errorp;
 152 };
 153 static int ppgrp_donice_callback(proc_t p, void * arg);
 154
 155 /*
 156  * Resource controls and accounting.
 157  */
 158 int
 159 getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval)
 160 {
 161         struct proc *p;
 162         int low = PRIO_MAX + 1;
 163         kauth_cred_t my_cred;
 164
 165         /* would also test (uap->who < 0), but id_t is unsigned */
 166         if (uap->who > 0x7fffffff)
 167                 return (EINVAL);
 168
 169         switch (uap->which) {
 170
 171         case PRIO_PROCESS:
 172                 if (uap->who == 0) {
 173                         p = curp;
 174                         low = p->p_nice;
 175                 } else {
 176                         p = proc_find(uap->who);
 177                         if (p == 0)
 178                                 break;
 179                         low = p->p_nice;
 180                         proc_rele(p);
 181
 182                 }
 183                 break;
 184
 185         case PRIO_PGRP: {
 186                 struct pgrp *pg = PGRP_NULL;
 187
 188                 if (uap->who == 0) {
 189                         /* returns the pgrp to ref */
 190                         pg = proc_pgrp(curp);
 191                  } else if ((pg = pgfind(uap->who)) == PGRP_NULL) {
 192                         break;
 193                 }
 194                 /* No need for iteration as it is a simple scan */
 195                 pgrp_lock(pg);
 196                 for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) {
 197                         if (p->p_nice < low)
 198                                 low = p->p_nice;
 199                 }
 200                 pgrp_unlock(pg);
 201                 pg_rele(pg);
 202                 break;
 203         }
 204
 205         case PRIO_USER:
 206                 if (uap->who == 0)
 207                         uap->who = kauth_cred_getuid(kauth_cred_get());
 208
 209                 proc_list_lock();
 210
 211                 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 212                         my_cred = kauth_cred_proc_ref(p);
 213                         if (kauth_cred_getuid(my_cred) == uap->who &&
 214                             p->p_nice < low)
 215                                 low = p->p_nice;
 216                         kauth_cred_unref(&my_cred);
 217                 }
 218
 219                 proc_list_unlock();
 220
 221                 break;
 222
 223         case PRIO_DARWIN_THREAD: {
 224                 thread_t                        thread;
 225                 struct uthread          *ut;
 226
 227                 /* we currently only support the current thread */
 228                 if (uap->who != 0) {
 229                         return (EINVAL);
 230                 }
 231
 232                 thread = current_thread();
 233                 ut = get_bsdthread_info(thread);
 234
 235                 low = 0;
 236                 if ( (ut->uu_flag & UT_BACKGROUND_TRAFFIC_MGT) != 0 ) {
 237                         low = 1;
 238                 }
 239                 break;
 240         }
 241
 242         default:
 243                 return (EINVAL);
 244         }
 245         if (low == PRIO_MAX + 1)
 246                 return (ESRCH);
 247         *retval = low;
 248         return (0);
 249 }
 250
 251 /* call back function used for proc iteration in PRIO_USER */
 252 static int
 253 puser_donice_callback(proc_t p, void * arg)
 254 {
 255         int error, n;
 256         struct puser_nice_args * pun = (struct puser_nice_args *)arg;
 257         kauth_cred_t my_cred;
 258
 259         my_cred = kauth_cred_proc_ref(p);
 260         if (kauth_cred_getuid(my_cred) == pun->who) {
 261                 error = donice(pun->curp, p, pun->prio);
 262                 if (pun->errorp != NULL)
 263                         *pun->errorp = error;
 264                 if (pun->foundp != NULL) {
 265                         n = *pun->foundp;
 266                         *pun->foundp = n+1;
 267                 }
 268         }
 269         kauth_cred_unref(&my_cred);
 270
 271         return(PROC_RETURNED);
 272 }
 273
 274 /* call back function used for proc iteration in PRIO_PGRP */
 275 static int
 276 ppgrp_donice_callback(proc_t p, void * arg)
 277 {
 278         int error;
 279         struct ppgrp_nice_args * pun = (struct ppgrp_nice_args *)arg;
 280         int n;
 281
 282         error = donice(pun->curp, p, pun->prio);
 283         if (pun->errorp != NULL)
 284                 *pun->errorp = error;
 285         if (pun->foundp!= NULL) {
 286                 n = *pun->foundp;
 287                 *pun->foundp = n+1;
 288         }
 289
 290         return(PROC_RETURNED);
 291 }
 292
 293 /*
 294  * Returns:     0                       Success
 295  *              EINVAL
 296  *              ESRCH
 297  *      donice:EPERM
 298  *      donice:EACCES
 299  */
 300 /* ARGSUSED */
 301 int
 302 setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *retval)
 303 {
 304         struct proc *p;
 305         int found = 0, error = 0;
 306         int refheld = 0;
 307
 308         AUDIT_ARG(cmd, uap->which);
 309         AUDIT_ARG(owner, uap->who, 0);
 310         AUDIT_ARG(value32, uap->prio);
 311
 312         /* would also test (uap->who < 0), but id_t is unsigned */
 313         if (uap->who > 0x7fffffff)
 314                 return (EINVAL);
 315
 316         switch (uap->which) {
 317
 318         case PRIO_PROCESS:
 319                 if (uap->who == 0)
 320                         p = curp;
 321                 else {
 322                         p = proc_find(uap->who);
 323                         if (p == 0)
 324                                 break;
 325                         refheld = 1;
 326                 }
 327                 error = donice(curp, p, uap->prio);
 328                 found++;
 329                 if (refheld != 0)
 330                         proc_rele(p);
 331                 break;
 332
 333         case PRIO_PGRP: {
 334                 struct pgrp *pg = PGRP_NULL;
 335                 struct ppgrp_nice_args ppgrp;
 336
 337                 if (uap->who == 0) {
 338                         pg = proc_pgrp(curp);
 339                  } else if ((pg = pgfind(uap->who)) == PGRP_NULL)
 340                         break;
 341
 342                 ppgrp.curp = curp;
 343                 ppgrp.prio = uap->prio;
 344                 ppgrp.foundp = &found;
 345                 ppgrp.errorp = &error;
 346
 347                 /* PGRP_DROPREF drops the reference on process group */
 348                 pgrp_iterate(pg, PGRP_DROPREF, ppgrp_donice_callback, (void *)&ppgrp, NULL, NULL);
 349
 350                 break;
 351         }
 352
 353         case PRIO_USER: {
 354                 struct puser_nice_args punice;
 355
 356                 if (uap->who == 0)
 357                         uap->who = kauth_cred_getuid(kauth_cred_get());
 358
 359                 punice.curp = curp;
 360                 punice.prio = uap->prio;
 361                 punice.who = uap->who;
 362                 punice.foundp = &found;
 363                 error = 0;
 364                 punice.errorp = &error;
 365                 proc_iterate(PROC_ALLPROCLIST, puser_donice_callback, (void *)&punice, NULL, NULL);
 366
 367                 break;
 368         }
 369
 370         case PRIO_DARWIN_THREAD: {
 371                 /* we currently only support the current thread */
 372                 if (uap->who != 0) {
 373                         return (EINVAL);
 374                 }
 375                 error = do_background_thread(curp, current_thread(), uap->prio);
 376                 if (!error) {
 377                         (void) do_background_socket(curp, current_thread(), uap->prio);
 378                 }
 379                 found++;
 380                 break;
 381         }
 382
 383         case PRIO_DARWIN_PROCESS: {
 384                 if (uap->who == 0)
 385                         p = curp;
 386                 else {
 387                         p = proc_find(uap->who);
 388                         if (p == 0)
 389                                 break;
 390                         refheld = 1;
 391                 }
 392
 393                 error = do_background_proc(curp, p, uap->prio);
 394                 if (!error) {
 395                         (void) do_background_socket(p, NULL, uap->prio);
 396                 }
 397
 398                 found++;
 399                 if (refheld != 0)
 400                         proc_rele(p);
 401                 break;
 402         }
 403
 404         default:
 405                 return (EINVAL);
 406         }
 407         if (found == 0)
 408                 return (ESRCH);
 409         return (error);
 410 }
 411
 412
 413 /*
 414  * Returns:     0                       Success
 415  *              EPERM
 416  *              EACCES
 417  *      mac_check_proc_sched:???
 418  */
 419 int
 420 donice(struct proc *curp, struct proc *chgp, int n)
 421 {
 422         int error = 0;
 423         kauth_cred_t ucred;
 424         kauth_cred_t my_cred;
 425
 426         ucred = kauth_cred_proc_ref(curp);
 427         my_cred = kauth_cred_proc_ref(chgp);
 428
 429         if (suser(ucred, NULL) && kauth_cred_getruid(ucred) &&
 430             kauth_cred_getuid(ucred) != kauth_cred_getuid(my_cred) &&
 431             kauth_cred_getruid(ucred) != kauth_cred_getuid(my_cred)) {
 432                 error = EPERM;
 433                 goto out;
 434         }
 435         if (n > PRIO_MAX)
 436                 n = PRIO_MAX;
 437         if (n < PRIO_MIN)
 438                 n = PRIO_MIN;
 439         if (n < chgp->p_nice && suser(ucred, &curp->p_acflag)) {
 440                 error = EACCES;
 441                 goto out;
 442         }
 443 #if CONFIG_MACF
 444         error = mac_proc_check_sched(curp, chgp);
 445         if (error)
 446                 goto out;
 447 #endif
 448         proc_lock(chgp);
 449         chgp->p_nice = n;
 450         proc_unlock(chgp);
 451         (void)resetpriority(chgp);
 452 out:
 453         kauth_cred_unref(&ucred);
 454         kauth_cred_unref(&my_cred);
 455         return (error);
 456 }
 457
 458 static int
 459 do_background_proc(struct proc *curp, struct proc *targetp, int priority)
 460 {
 461         int error = 0;
 462         kauth_cred_t ucred;
 463         kauth_cred_t target_cred;
 464 #if CONFIG_EMBEDDED
 465         task_category_policy_data_t info;
 466 #endif
 467
 468         ucred = kauth_cred_get();
 469         target_cred = kauth_cred_proc_ref(targetp);
 470
 471         if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) &&
 472             kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) &&
 473             kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred))
 474         {
 475                 error = EPERM;
 476                 goto out;
 477         }
 478
 479 #if CONFIG_MACF
 480         error = mac_proc_check_sched(curp, targetp);
 481         if (error)
 482                 goto out;
 483 #endif
 484
 485 #if !CONFIG_EMBEDDED
 486         if (priority == PRIO_DARWIN_NONUI)
 487                 error = proc_apply_task_gpuacc(targetp->task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS);
 488         else
 489                 error = proc_set1_bgtaskpolicy(targetp->task, priority);
 490         if (error)
 491                 goto out;
 492 #else /* !CONFIG_EMBEDDED */
 493
 494         /* set the max scheduling priority on the task */
 495         if (priority == PRIO_DARWIN_BG) {
 496                 info.role = TASK_THROTTLE_APPLICATION;
 497         }
 498         else if (priority == PRIO_DARWIN_NONUI) {
 499                 info.role = TASK_NONUI_APPLICATION;
 500         }
 501         else {
 502                 info.role = TASK_DEFAULT_APPLICATION;
 503         }
 504
 505         error = task_policy_set(targetp->task,
 506                         TASK_CATEGORY_POLICY,
 507                         (task_policy_t) &info,
 508                         TASK_CATEGORY_POLICY_COUNT);
 509
 510         if (error)
 511                 goto out;
 512
 513         proc_lock(targetp);
 514
 515         /* mark proc structure as backgrounded */
 516         if (priority == PRIO_DARWIN_BG) {
 517                 targetp->p_lflag |= P_LBACKGROUND;
 518         } else {
 519                 targetp->p_lflag &= ~P_LBACKGROUND;
 520         }
 521
 522         /* set or reset the disk I/O priority */
 523         targetp->p_iopol_disk = (priority == PRIO_DARWIN_BG ?
 524                         IOPOL_THROTTLE : IOPOL_DEFAULT);
 525
 526         proc_unlock(targetp);
 527 #endif /* !CONFIG_EMBEDDED */
 528
 529 out:
 530         kauth_cred_unref(&target_cred);
 531         return (error);
 532 }
 533
 534 static void
 535 do_background_socket(struct proc *p, thread_t thread, int priority)
 536 {
 537         struct filedesc                     *fdp;
 538         struct fileproc                     *fp;
 539         int                                 i;
 540
 541         if (priority == PRIO_DARWIN_BG) {
 542                 /*
 543                  * For PRIO_DARWIN_PROCESS (thread is NULL), simply mark
 544                  * the sockets with the background flag.  There's nothing
 545                  * to do here for the PRIO_DARWIN_THREAD case.
 546                  */
 547                 if (thread == NULL) {
 548                         proc_fdlock(p);
 549                         fdp = p->p_fd;
 550
 551                         for (i = 0; i < fdp->fd_nfiles; i++) {
 552                                 struct socket       *sockp;
 553
 554                                 fp = fdp->fd_ofiles[i];
 555                                 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
 556                                                 fp->f_fglob->fg_type != DTYPE_SOCKET) {
 557                                         continue;
 558                                 }
 559                                 sockp = (struct socket *)fp->f_fglob->fg_data;
 560                                 socket_set_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
 561                                 sockp->so_background_thread = NULL;
 562                         }
 563                         proc_fdunlock(p);
 564                 }
 565
 566         } else {
 567
 568                 /* disable networking IO throttle.
 569                  * NOTE - It is a known limitation of the current design that we
 570                  * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for
 571                  * sockets created by other threads within this process.
 572                  */
 573                 proc_fdlock(p);
 574                 fdp = p->p_fd;
 575                 for ( i = 0; i < fdp->fd_nfiles; i++ ) {
 576                         struct socket       *sockp;
 577
 578                         fp = fdp->fd_ofiles[ i ];
 579                         if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 ||
 580                                         fp->f_fglob->fg_type != DTYPE_SOCKET ) {
 581                                 continue;
 582                         }
 583                         sockp = (struct socket *)fp->f_fglob->fg_data;
 584                         /* skip if only clearing this thread's sockets */
 585                         if ((thread) && (sockp->so_background_thread != thread)) {
 586                                 continue;
 587                         }
 588                         socket_clear_traffic_mgt_flags(sockp, TRAFFIC_MGT_SO_BACKGROUND);
 589                         sockp->so_background_thread = NULL;
 590                 }
 591                 proc_fdunlock(p);
 592         }
 593 }
 594
 595
 596 /*
 597  * do_background_thread
 598  * Returns:     0                       Success
 599  * XXX - todo - does this need a MACF hook?
 600  *
 601  * NOTE: To maintain binary compatibility with PRIO_DARWIN_THREAD with respect
 602  *       to network traffic management, UT_BACKGROUND_TRAFFIC_MGT is set/cleared
 603  *       along with UT_BACKGROUND flag, as the latter alone no longer implies
 604  *       any form of traffic regulation (it simply means that the thread is
 605  *       background.)  With PRIO_DARWIN_PROCESS, any form of network traffic
 606  *       management must be explicitly requested via whatever means appropriate,
 607  *       and only TRAFFIC_MGT_SO_BACKGROUND is set via do_background_socket().
 608  */
 609 static int
 610 do_background_thread(struct proc *curp __unused, thread_t thread, int priority)
 611 {
 612         struct uthread                                          *ut;
 613 #if !CONFIG_EMBEDDED
 614         int error = 0;
 615 #else /* !CONFIG_EMBEDDED */
 616         thread_precedence_policy_data_t         policy;
 617 #endif /* !CONFIG_EMBEDDED */
 618
 619         ut = get_bsdthread_info(thread);
 620
 621         /* Backgrounding is unsupported for threads in vfork */
 622         if ( (ut->uu_flag & UT_VFORK) != 0) {
 623                 return(EPERM);
 624         }
 625
 626 #if !CONFIG_EMBEDDED
 627         error = proc_set1_bgthreadpolicy(curp->task, thread_tid(thread), priority);
 628         return(error);
 629 #else /* !CONFIG_EMBEDDED */
 630         if ( (priority & PRIO_DARWIN_BG) == 0 ) {
 631                 /* turn off backgrounding of thread */
 632                 if ( (ut->uu_flag & UT_BACKGROUND) == 0 ) {
 633                         /* already off */
 634                         return(0);
 635                 }
 636
 637                 /*
 638                  * Clear background bit in thread and disable disk IO
 639                  * throttle as well as network traffic management.
 640                  * The corresponding socket flags for sockets created by
 641                  * this thread will be cleared in do_background_socket().
 642                  */
 643                 ut->uu_flag &= ~(UT_BACKGROUND | UT_BACKGROUND_TRAFFIC_MGT);
 644                 ut->uu_iopol_disk = IOPOL_NORMAL;
 645
 646                 /* reset thread priority (we did not save previous value) */
 647                 policy.importance = 0;
 648                 thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
 649                                                    (thread_policy_t)&policy,
 650                                                    THREAD_PRECEDENCE_POLICY_COUNT );
 651                 return(0);
 652         }
 653
 654         /* background this thread */
 655         if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
 656                 /* already backgrounded */
 657                 return(0);
 658         }
 659
 660         /*
 661          * Tag thread as background and throttle disk IO, as well
 662          * as regulate network traffics.  Future sockets created
 663          * by this thread will have their corresponding socket
 664          * flags set at socket create time.
 665          */
 666         ut->uu_flag |= (UT_BACKGROUND | UT_BACKGROUND_TRAFFIC_MGT);
 667         ut->uu_iopol_disk = IOPOL_THROTTLE;
 668
 669         policy.importance = INT_MIN;
 670         thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
 671                                            (thread_policy_t)&policy,
 672                                            THREAD_PRECEDENCE_POLICY_COUNT );
 673
 674         /* throttle networking IO happens in socket( ) syscall.
 675          * If UT_{BACKGROUND,BACKGROUND_TRAFFIC_MGT} is set in the current
 676          * thread then TRAFFIC_MGT_SO_{BACKGROUND,BG_REGULATE} is set.
 677          * Existing sockets are taken care of by do_background_socket().
 678          */
 679 #endif /* !CONFIG_EMBEDDED */
 680         return(0);
 681 }
 682
 683 #if CONFIG_EMBEDDED
 684 int mach_do_background_thread(thread_t thread, int prio);
 685
 686 int
 687 mach_do_background_thread(thread_t thread, int prio)
 688 {
 689         int                     error           = 0;
 690         struct proc             *curp           = NULL;
 691         struct proc             *targetp        = NULL;
 692         kauth_cred_t    ucred;
 693
 694         targetp = get_bsdtask_info(get_threadtask(thread));
 695         if (!targetp) {
 696                 return KERN_INVALID_ARGUMENT;
 697         }
 698
 699         curp = proc_self();
 700         if (curp == PROC_NULL) {
 701                 return KERN_FAILURE;
 702         }
 703
 704         ucred = kauth_cred_proc_ref(curp);
 705
 706         if (suser(ucred, NULL) && curp != targetp) {
 707                 error = KERN_PROTECTION_FAILURE;
 708                 goto out;
 709         }
 710
 711         error = do_background_thread(curp, thread, prio);
 712         if (!error) {
 713                 (void) do_background_socket(curp, thread, prio);
 714         } else {
 715                 if (error == EPERM) {
 716                         error = KERN_PROTECTION_FAILURE;
 717                 } else {
 718                         error = KERN_FAILURE;
 719                 }
 720         }
 721
 722 out:
 723         proc_rele(curp);
 724         kauth_cred_unref(&ucred);
 725         return error;
 726 }
 727 #endif /* CONFIG_EMBEDDED */
 728
 729 #if CONFIG_EMBEDDED
 730 /*
 731  * If the thread or its proc has been put into the background
 732  * with setpriority(PRIO_DARWIN_{THREAD,PROCESS}, *, PRIO_DARWIN_BG),
 733  * report that status.
 734  *
 735  * Returns: PRIO_DARWIN_BG if background
 736  *                      0 if foreground
 737  */
 738 int
 739 uthread_get_background_state(uthread_t uth)
 740 {
 741         proc_t p = uth->uu_proc;
 742         if (p && (p->p_lflag & P_LBACKGROUND))
 743                 return PRIO_DARWIN_BG;
 744
 745         if (uth->uu_flag & UT_BACKGROUND)
 746                 return PRIO_DARWIN_BG;
 747
 748         return 0;
 749 }
 750 #endif /* CONFIG_EMBEDDED */
 751
 752 /*
 753  * Returns:     0                       Success
 754  *      copyin:EFAULT
 755  *      dosetrlimit:
 756  */
 757 /* ARGSUSED */
 758 int
 759 setrlimit(struct proc *p, struct setrlimit_args *uap, __unused int32_t *retval)
 760 {
 761         struct rlimit alim;
 762         int error;
 763
 764         if ((error = copyin(uap->rlp, (caddr_t)&alim,
 765             sizeof (struct rlimit))))
 766                 return (error);
 767
 768         return (dosetrlimit(p, uap->which, &alim));
 769 }
 770
 771 /*
 772  * Returns:     0                       Success
 773  *              EINVAL
 774  *              ENOMEM                  Cannot copy limit structure
 775  *      suser:EPERM
 776  *
 777  * Notes:       EINVAL is returned both for invalid arguments, and in the
 778  *              case that the current usage (e.g. RLIMIT_STACK) is already
 779  *              in excess of the requested limit.
 780  */
 781 int
 782 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
 783 {
 784         struct rlimit *alimp;
 785         int error;
 786         kern_return_t   kr;
 787         int posix = (which & _RLIMIT_POSIX_FLAG) ? 1 : 0;
 788
 789         /* Mask out POSIX flag, saved above */
 790         which &= ~_RLIMIT_POSIX_FLAG;
 791
 792         if (which >= RLIM_NLIMITS)
 793                 return (EINVAL);
 794
 795         alimp = &p->p_rlimit[which];
 796         if (limp->rlim_cur > limp->rlim_max)
 797                 return EINVAL;
 798
 799         if (limp->rlim_cur > alimp->rlim_max ||
 800             limp->rlim_max > alimp->rlim_max)
 801                 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
 802                         return (error);
 803         }
 804
 805         proc_limitblock(p);
 806
 807         if ((error = proc_limitreplace(p)) != 0) {
 808                 proc_limitunblock(p);
 809                 return(error);
 810         }
 811
 812         alimp = &p->p_rlimit[which];
 813
 814         switch (which) {
 815
 816         case RLIMIT_CPU:
 817                 if (limp->rlim_cur == RLIM_INFINITY) {
 818                         task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
 819                         timerclear(&p->p_rlim_cpu);
 820                 }
 821                 else {
 822                         task_absolutetime_info_data_t   tinfo;
 823                         mach_msg_type_number_t                  count;
 824                         struct timeval                                  ttv, tv;
 825                         clock_sec_t                                             tv_sec;
 826                         clock_usec_t                                    tv_usec;
 827
 828                         count = TASK_ABSOLUTETIME_INFO_COUNT;
 829                         task_info(p->task, TASK_ABSOLUTETIME_INFO,
 830                                                                 (task_info_t)&tinfo, &count);
 831                         absolutetime_to_microtime(tinfo.total_user + tinfo.total_system,
 832                                                                           &tv_sec, &tv_usec);
 833                         ttv.tv_sec = tv_sec;
 834                         ttv.tv_usec = tv_usec;
 835
 836                         tv.tv_sec = (limp->rlim_cur > __INT_MAX__ ? __INT_MAX__ : limp->rlim_cur);
 837                         tv.tv_usec = 0;
 838                         timersub(&tv, &ttv, &p->p_rlim_cpu);
 839
 840                         timerclear(&tv);
 841                         if (timercmp(&p->p_rlim_cpu, &tv, >))
 842                                 task_vtimer_set(p->task, TASK_VTIMER_RLIM);
 843                         else {
 844                                 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
 845
 846                                 timerclear(&p->p_rlim_cpu);
 847
 848                                 psignal(p, SIGXCPU);
 849                         }
 850                 }
 851                 break;
 852
 853         case RLIMIT_DATA:
 854                 if (limp->rlim_cur > maxdmap)
 855                         limp->rlim_cur = maxdmap;
 856                 if (limp->rlim_max > maxdmap)
 857                         limp->rlim_max = maxdmap;
 858                 break;
 859
 860         case RLIMIT_STACK:
 861                 /* Disallow illegal stack size instead of clipping */
 862                 if (limp->rlim_cur > maxsmap ||
 863                     limp->rlim_max > maxsmap) {
 864                         if (posix) {
 865                                 error = EINVAL;
 866                                 goto out;
 867                         }
 868                         else {
 869                                 /*
 870                                  * 4797860 - workaround poorly written installers by
 871                                  * doing previous implementation (< 10.5) when caller
 872                                  * is non-POSIX conforming.
 873                                  */
 874                                 if (limp->rlim_cur > maxsmap)
 875                                         limp->rlim_cur = maxsmap;
 876                                 if (limp->rlim_max > maxsmap)
 877                                         limp->rlim_max = maxsmap;
 878                         }
 879                 }
 880
 881                 /*
 882                  * Stack is allocated to the max at exec time with only
 883                  * "rlim_cur" bytes accessible.  If stack limit is going
 884                  * up make more accessible, if going down make inaccessible.
 885                  */
 886                 if (limp->rlim_cur > alimp->rlim_cur) {
 887                         user_addr_t addr;
 888                         user_size_t size;
 889
 890                                 /* grow stack */
 891                                 size = round_page_64(limp->rlim_cur);
 892                                 size -= round_page_64(alimp->rlim_cur);
 893
 894 #if STACK_GROWTH_UP
 895                                 /* go to top of current stack */
 896                         addr = p->user_stack + round_page_64(alimp->rlim_cur);
 897 #else   /* STACK_GROWTH_UP */
 898                         addr = p->user_stack - round_page_64(limp->rlim_cur);
 899 #endif /* STACK_GROWTH_UP */
 900                         kr = mach_vm_protect(current_map(),
 901                                              addr, size,
 902                                              FALSE, VM_PROT_DEFAULT);
 903                         if (kr != KERN_SUCCESS) {
 904                                 error =  EINVAL;
 905                                 goto out;
 906                         }
 907                 } else if (limp->rlim_cur < alimp->rlim_cur) {
 908                         user_addr_t addr;
 909                         user_size_t size;
 910                         user_addr_t cur_sp;
 911
 912                                 /* shrink stack */
 913
 914                         /*
 915                          * First check if new stack limit would agree
 916                          * with current stack usage.
 917                          * Get the current thread's stack pointer...
 918                          */
 919                         cur_sp = thread_adjuserstack(current_thread(),
 920                                                      0);
 921 #if STACK_GROWTH_UP
 922                         if (cur_sp >= p->user_stack &&
 923                             cur_sp < (p->user_stack +
 924                                       round_page_64(alimp->rlim_cur))) {
 925                                 /* current stack pointer is in main stack */
 926                                 if (cur_sp >= (p->user_stack +
 927                                                round_page_64(limp->rlim_cur))) {
 928                                         /*
 929                                          * New limit would cause
 930                                          * current usage to be invalid:
 931                                          * reject new limit.
 932                                          */
 933                                         error =  EINVAL;
 934                                         goto out;
 935                         }
 936                         } else {
 937                                 /* not on the main stack: reject */
 938                                 error =  EINVAL;
 939                                 goto out;
 940                 }
 941
 942 #else   /* STACK_GROWTH_UP */
 943                         if (cur_sp <= p->user_stack &&
 944                             cur_sp > (p->user_stack -
 945                                       round_page_64(alimp->rlim_cur))) {
 946                                 /* stack pointer is in main stack */
 947                                 if (cur_sp <= (p->user_stack -
 948                                                round_page_64(limp->rlim_cur))) {
 949                                         /*
 950                                          * New limit would cause
 951                                          * current usage to be invalid:
 952                                          * reject new limit.
 953                                          */
 954                                         error =  EINVAL;
 955                                         goto out;
 956                                 }
 957                         } else {
 958                                 /* not on the main stack: reject */
 959                                 error =  EINVAL;
 960                                 goto out;
 961                         }
 962 #endif  /* STACK_GROWTH_UP */
 963
 964                         size = round_page_64(alimp->rlim_cur);
 965                         size -= round_page_64(limp->rlim_cur);
 966
 967 #if STACK_GROWTH_UP
 968                         addr = p->user_stack + round_page_64(limp->rlim_cur);
 969 #else   /* STACK_GROWTH_UP */
 970                         addr = p->user_stack - round_page_64(alimp->rlim_cur);
 971 #endif /* STACK_GROWTH_UP */
 972
 973                         kr = mach_vm_protect(current_map(),
 974                                              addr, size,
 975                                              FALSE, VM_PROT_NONE);
 976                         if (kr != KERN_SUCCESS) {
 977                                 error =  EINVAL;
 978                                 goto out;
 979                         }
 980                 } else {
 981                         /* no change ... */
 982                 }
 983                 break;
 984
 985         case RLIMIT_NOFILE:
 986                 /*
 987                  * Only root can set the maxfiles limits, as it is
 988                  * systemwide resource.  If we are expecting POSIX behavior,
 989                  * instead of clamping the value, return EINVAL.  We do this
 990                  * because historically, people have been able to attempt to
 991                  * set RLIM_INFINITY to get "whatever the maximum is".
 992                 */
 993                 if ( is_suser() ) {
 994                         if (limp->rlim_cur != alimp->rlim_cur &&
 995                             limp->rlim_cur > (rlim_t)maxfiles) {
 996                                 if (posix) {
 997                                         error =  EINVAL;
 998                                         goto out;
 999                                 }
1000                                 limp->rlim_cur = maxfiles;
1001                         }
1002                         if (limp->rlim_max != alimp->rlim_max &&
1003                             limp->rlim_max > (rlim_t)maxfiles)
1004                                 limp->rlim_max = maxfiles;
1005                 }
1006                 else {
1007                         if (limp->rlim_cur != alimp->rlim_cur &&
1008                             limp->rlim_cur > (rlim_t)maxfilesperproc) {
1009                                 if (posix) {
1010                                         error =  EINVAL;
1011                                         goto out;
1012                                 }
1013                                 limp->rlim_cur = maxfilesperproc;
1014                         }
1015                         if (limp->rlim_max != alimp->rlim_max &&
1016                             limp->rlim_max > (rlim_t)maxfilesperproc)
1017                                 limp->rlim_max = maxfilesperproc;
1018                 }
1019                 break;
1020
1021         case RLIMIT_NPROC:
1022                 /*
1023                  * Only root can set to the maxproc limits, as it is
1024                  * systemwide resource; all others are limited to
1025                  * maxprocperuid (presumably less than maxproc).
1026                  */
1027                 if ( is_suser() ) {
1028                         if (limp->rlim_cur > (rlim_t)maxproc)
1029                                 limp->rlim_cur = maxproc;
1030                         if (limp->rlim_max > (rlim_t)maxproc)
1031                                 limp->rlim_max = maxproc;
1032                 }
1033                 else {
1034                         if (limp->rlim_cur > (rlim_t)maxprocperuid)
1035                                 limp->rlim_cur = maxprocperuid;
1036                         if (limp->rlim_max > (rlim_t)maxprocperuid)
1037                                 limp->rlim_max = maxprocperuid;
1038                 }
1039                 break;
1040
1041         case RLIMIT_MEMLOCK:
1042                 /*
1043                  * Tell the Mach VM layer about the new limit value.
1044                  */
1045
1046                 vm_map_set_user_wire_limit(current_map(), limp->rlim_cur);
1047                 break;
1048
1049         } /* switch... */
1050         proc_lock(p);
1051         *alimp = *limp;
1052         proc_unlock(p);
1053         error = 0;
1054 out:
1055         proc_limitunblock(p);
1056         return (error);
1057 }
1058
1059 /* ARGSUSED */
1060 int
1061 getrlimit(struct proc *p, struct getrlimit_args *uap, __unused int32_t *retval)
1062 {
1063         struct rlimit lim;
1064
1065         /*
1066          * Take out flag now in case we need to use it to trigger variant
1067          * behaviour later.
1068          */
1069         uap->which &= ~_RLIMIT_POSIX_FLAG;
1070
1071         if (uap->which >= RLIM_NLIMITS)
1072                 return (EINVAL);
1073         proc_limitget(p, uap->which, &lim);
1074         return (copyout((caddr_t)&lim,
1075                         uap->rlp, sizeof (struct rlimit)));
1076 }
1077
1078 /*
1079  * Transform the running time and tick information in proc p into user,
1080  * system, and interrupt time usage.
1081  */
1082 /* No lock on proc is held for this.. */
1083 void
1084 calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip)
1085 {
1086         task_t                  task;
1087
1088         timerclear(up);
1089         timerclear(sp);
1090         if (ip != NULL)
1091                 timerclear(ip);
1092
1093         task = p->task;
1094         if (task) {
1095                 task_basic_info_32_data_t tinfo;
1096                 task_thread_times_info_data_t ttimesinfo;
1097                 task_events_info_data_t teventsinfo;
1098                 mach_msg_type_number_t task_info_count, task_ttimes_count;
1099                 mach_msg_type_number_t task_events_count;
1100                 struct timeval ut,st;
1101
1102                 task_info_count = TASK_BASIC_INFO_32_COUNT;
1103                 task_info(task, TASK_BASIC2_INFO_32,
1104                           (task_info_t)&tinfo, &task_info_count);
1105                 ut.tv_sec = tinfo.user_time.seconds;
1106                 ut.tv_usec = tinfo.user_time.microseconds;
1107                 st.tv_sec = tinfo.system_time.seconds;
1108                 st.tv_usec = tinfo.system_time.microseconds;
1109                 timeradd(&ut, up, up);
1110                 timeradd(&st, sp, sp);
1111
1112                 task_ttimes_count = TASK_THREAD_TIMES_INFO_COUNT;
1113                 task_info(task, TASK_THREAD_TIMES_INFO,
1114                           (task_info_t)&ttimesinfo, &task_ttimes_count);
1115
1116                 ut.tv_sec = ttimesinfo.user_time.seconds;
1117                 ut.tv_usec = ttimesinfo.user_time.microseconds;
1118                 st.tv_sec = ttimesinfo.system_time.seconds;
1119                 st.tv_usec = ttimesinfo.system_time.microseconds;
1120                 timeradd(&ut, up, up);
1121                 timeradd(&st, sp, sp);
1122
1123                 task_events_count = TASK_EVENTS_INFO_COUNT;
1124                 task_info(task, TASK_EVENTS_INFO,
1125                           (task_info_t)&teventsinfo, &task_events_count);
1126
1127                 /*
1128                  * No need to lock "p":  this does not need to be
1129                  * completely consistent, right ?
1130                  */
1131                 p->p_stats->p_ru.ru_minflt = (teventsinfo.faults -
1132                                               teventsinfo.pageins);
1133                 p->p_stats->p_ru.ru_majflt = teventsinfo.pageins;
1134                 p->p_stats->p_ru.ru_nivcsw = (teventsinfo.csw -
1135                                               p->p_stats->p_ru.ru_nvcsw);
1136                 if (p->p_stats->p_ru.ru_nivcsw < 0)
1137                         p->p_stats->p_ru.ru_nivcsw = 0;
1138
1139                 p->p_stats->p_ru.ru_maxrss = tinfo.resident_size;
1140         }
1141 }
1142
1143 __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p);
1144 __private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p);
1145
1146 /* ARGSUSED */
1147 int
1148 getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval)
1149 {
1150         struct rusage *rup, rubuf;
1151         struct user64_rusage rubuf64;
1152         struct user32_rusage rubuf32;
1153         size_t retsize = sizeof(rubuf);                 /* default: 32 bits */
1154         caddr_t retbuf = (caddr_t)&rubuf;               /* default: 32 bits */
1155         struct timeval utime;
1156         struct timeval stime;
1157
1158
1159         switch (uap->who) {
1160         case RUSAGE_SELF:
1161                 calcru(p, &utime, &stime, NULL);
1162                 proc_lock(p);
1163                 rup = &p->p_stats->p_ru;
1164                 rup->ru_utime = utime;
1165                 rup->ru_stime = stime;
1166
1167                 rubuf = *rup;
1168                 proc_unlock(p);
1169
1170                 break;
1171
1172         case RUSAGE_CHILDREN:
1173                 proc_lock(p);
1174                 rup = &p->p_stats->p_cru;
1175                 rubuf = *rup;
1176                 proc_unlock(p);
1177                 break;
1178
1179         default:
1180                 return (EINVAL);
1181         }
1182         if (IS_64BIT_PROCESS(p)) {
1183                 retsize = sizeof(rubuf64);
1184                 retbuf = (caddr_t)&rubuf64;
1185                 munge_user64_rusage(&rubuf, &rubuf64);
1186         } else {
1187                 retsize = sizeof(rubuf32);
1188                 retbuf = (caddr_t)&rubuf32;
1189                 munge_user32_rusage(&rubuf, &rubuf32);
1190         }
1191
1192         return (copyout(retbuf, uap->rusage, retsize));
1193 }
1194
1195 void
1196 ruadd(struct rusage *ru, struct rusage *ru2)
1197 {
1198         long *ip, *ip2;
1199         long i;
1200
1201         timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
1202         timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
1203         if (ru->ru_maxrss < ru2->ru_maxrss)
1204                 ru->ru_maxrss = ru2->ru_maxrss;
1205         ip = &ru->ru_first; ip2 = &ru2->ru_first;
1206         for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
1207                 *ip++ += *ip2++;
1208 }
1209
1210 void
1211 proc_limitget(proc_t p, int which, struct rlimit * limp)
1212 {
1213         proc_list_lock();
1214         limp->rlim_cur = p->p_rlimit[which].rlim_cur;
1215         limp->rlim_max = p->p_rlimit[which].rlim_max;
1216         proc_list_unlock();
1217 }
1218
1219
1220 void
1221 proc_limitdrop(proc_t p, int exiting)
1222 {
1223         struct  plimit * freelim = NULL;
1224         struct  plimit * freeoldlim = NULL;
1225
1226         proc_list_lock();
1227
1228         if (--p->p_limit->pl_refcnt == 0) {
1229                 freelim = p->p_limit;
1230                 p->p_limit = NULL;
1231         }
1232         if ((exiting != 0) && (p->p_olimit != NULL) && (--p->p_olimit->pl_refcnt == 0)) {
1233                 freeoldlim =  p->p_olimit;
1234                 p->p_olimit = NULL;
1235         }
1236
1237         proc_list_unlock();
1238         if (freelim != NULL)
1239                 FREE_ZONE(freelim, sizeof *p->p_limit, M_PLIMIT);
1240         if (freeoldlim != NULL)
1241                 FREE_ZONE(freeoldlim, sizeof *p->p_olimit, M_PLIMIT);
1242 }
1243
1244
1245 void
1246 proc_limitfork(proc_t parent, proc_t child)
1247 {
1248         proc_list_lock();
1249         child->p_limit = parent->p_limit;
1250         child->p_limit->pl_refcnt++;
1251         child->p_olimit = NULL;
1252         proc_list_unlock();
1253 }
1254
1255 void
1256 proc_limitblock(proc_t p)
1257 {
1258         proc_lock(p);
1259         while (p->p_lflag & P_LLIMCHANGE) {
1260                 p->p_lflag |= P_LLIMWAIT;
1261                 msleep(&p->p_olimit, &p->p_mlock, 0, "proc_limitblock", NULL);
1262         }
1263         p->p_lflag |= P_LLIMCHANGE;
1264         proc_unlock(p);
1265
1266 }
1267
1268
1269 void
1270 proc_limitunblock(proc_t p)
1271 {
1272         proc_lock(p);
1273         p->p_lflag &= ~P_LLIMCHANGE;
1274         if (p->p_lflag & P_LLIMWAIT) {
1275                 p->p_lflag &= ~P_LLIMWAIT;
1276                 wakeup(&p->p_olimit);
1277         }
1278         proc_unlock(p);
1279 }
1280
1281 /* This is called behind serialization provided by proc_limitblock/unlbock */
1282 int
1283 proc_limitreplace(proc_t p)
1284 {
1285         struct plimit *copy;
1286
1287
1288         proc_list_lock();
1289
1290         if (p->p_limit->pl_refcnt == 1) {
1291                 proc_list_unlock();
1292                 return(0);
1293         }
1294
1295         proc_list_unlock();
1296
1297         MALLOC_ZONE(copy, struct plimit *,
1298                         sizeof(struct plimit), M_PLIMIT, M_WAITOK);
1299         if (copy == NULL) {
1300                 return(ENOMEM);
1301         }
1302
1303         proc_list_lock();
1304         bcopy(p->p_limit->pl_rlimit, copy->pl_rlimit,
1305             sizeof(struct rlimit) * RLIM_NLIMITS);
1306         copy->pl_refcnt = 1;
1307         /* hang on to reference to old till process exits */
1308         p->p_olimit = p->p_limit;
1309         p->p_limit = copy;
1310         proc_list_unlock();
1311
1312         return(0);
1313 }
1314
1315
1316 /*
1317  * iopolicysys
1318  *
1319  * Description: System call MUX for use in manipulating I/O policy attributes of the current process or thread
1320  *
1321  * Parameters:  cmd                             Policy command
1322  *              arg                             Pointer to policy arguments
1323  *
1324  * Returns:     0                               Success
1325  *              EINVAL                          Invalid command or invalid policy arguments
1326  *
1327  */
1328 int
1329 iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused int32_t *retval)
1330 {
1331         int     error = 0;
1332         struct _iopol_param_t iop_param;
1333 #if !CONFIG_EMBEDDED
1334         int processwide = 0;
1335 #else /* !CONFIG_EMBEDDED */
1336         thread_t thread = THREAD_NULL;
1337         struct uthread  *ut = NULL;
1338         int *policy;
1339 #endif /* !CONFIG_EMBEDDED */
1340
1341         if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
1342                 goto out;
1343
1344         if (iop_param.iop_iotype != IOPOL_TYPE_DISK) {
1345                 error = EINVAL;
1346                 goto out;
1347         }
1348
1349 #if !CONFIG_EMBEDDED
1350         switch (iop_param.iop_scope) {
1351         case IOPOL_SCOPE_PROCESS:
1352                 processwide = 1;
1353                 break;
1354         case IOPOL_SCOPE_THREAD:
1355                 processwide = 0;
1356                 break;
1357         default:
1358                 error = EINVAL;
1359                 goto out;
1360         }
1361
1362         switch(uap->cmd) {
1363         case IOPOL_CMD_SET:
1364                 switch (iop_param.iop_policy) {
1365                 case IOPOL_DEFAULT:
1366                 case IOPOL_NORMAL:
1367                 case IOPOL_THROTTLE:
1368                 case IOPOL_PASSIVE:
1369                         if(processwide != 0)
1370                                 proc_apply_task_diskacc(current_task(), iop_param.iop_policy);
1371                         else
1372                                 proc_apply_thread_selfdiskacc(iop_param.iop_policy);
1373
1374                         break;
1375                 default:
1376                         error = EINVAL;
1377                         goto out;
1378                 }
1379                 break;
1380
1381         case IOPOL_CMD_GET:
1382                 if(processwide != 0)
1383                         iop_param.iop_policy = proc_get_task_disacc(current_task());
1384                 else
1385                         iop_param.iop_policy = proc_get_thread_selfdiskacc();
1386
1387                 error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
1388
1389                 break;
1390         default:
1391                 error = EINVAL; // unknown command
1392                 break;
1393         }
1394
1395 #else /* !CONFIG_EMBEDDED */
1396         switch (iop_param.iop_scope) {
1397         case IOPOL_SCOPE_PROCESS:
1398                 policy = &p->p_iopol_disk;
1399                 break;
1400         case IOPOL_SCOPE_THREAD:
1401                 thread = current_thread();
1402                 ut = get_bsdthread_info(thread);
1403                 policy = &ut->uu_iopol_disk;
1404                 break;
1405         default:
1406                 error = EINVAL;
1407                 goto out;
1408         }
1409
1410         switch(uap->cmd) {
1411         case IOPOL_CMD_SET:
1412                 switch (iop_param.iop_policy) {
1413                 case IOPOL_DEFAULT:
1414                 case IOPOL_NORMAL:
1415                 case IOPOL_THROTTLE:
1416                 case IOPOL_PASSIVE:
1417                         proc_lock(p);
1418                         *policy = iop_param.iop_policy;
1419                         proc_unlock(p);
1420                         break;
1421                 default:
1422                         error = EINVAL;
1423                         goto out;
1424                 }
1425                 break;
1426         case IOPOL_CMD_GET:
1427                 switch (*policy) {
1428                 case IOPOL_DEFAULT:
1429                 case IOPOL_NORMAL:
1430                 case IOPOL_THROTTLE:
1431                 case IOPOL_PASSIVE:
1432                         iop_param.iop_policy = *policy;
1433                         break;
1434                 default: // in-kernel
1435                         // this should never happen
1436                         printf("%s: unknown I/O policy %d\n", __func__, *policy);
1437                         // restore to default value
1438                         *policy = IOPOL_DEFAULT;
1439                         iop_param.iop_policy = *policy;
1440                 }
1441
1442                 error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
1443                 break;
1444         default:
1445                 error = EINVAL; // unknown command
1446                 break;
1447         }
1448
1449 #endif /* !CONFIG_EMBEDDED */
1450 out:
1451         *retval = error;
1452         return (error);
1453 }
1454
1455
1456 boolean_t thread_is_io_throttled(void);
1457
1458 boolean_t
1459 thread_is_io_throttled(void)
1460 {
1461
1462 #if !CONFIG_EMBEDDED
1463
1464         return(proc_get_task_selfdiskacc() == IOPOL_THROTTLE);
1465
1466 #else /* !CONFIG_EMBEDDED */
1467         int     policy;
1468         struct uthread  *ut;
1469
1470         ut = get_bsdthread_info(current_thread());
1471
1472         if(ut){
1473                 policy = current_proc()->p_iopol_disk;
1474
1475                 if (ut->uu_iopol_disk != IOPOL_DEFAULT)
1476                         policy = ut->uu_iopol_disk;
1477
1478                 if (policy == IOPOL_THROTTLE)
1479                         return TRUE;
1480         }
1481         return FALSE;
1482 #endif /* !CONFIG_EMBEDDED */
1483 }
1484
1485 void
1486 proc_apply_task_networkbg(void * bsd_info)
1487 {
1488         proc_t p = PROC_NULL;
1489         proc_t curp = (proc_t)bsd_info;
1490         pid_t pid;
1491
1492         pid = curp->p_pid;
1493         p = proc_find(pid);
1494         if (p != PROC_NULL) {
1495                 do_background_socket(p, NULL, PRIO_DARWIN_BG);
1496                 proc_rele(p);
1497         }
1498 }
1499
1500 void
1501 proc_restore_task_networkbg(void * bsd_info)
1502 {
1503         proc_t p = PROC_NULL;
1504         proc_t curp = (proc_t)bsd_info;
1505         pid_t pid;
1506
1507         pid = curp->p_pid;
1508         p = proc_find(pid);
1509         if (p != PROC_NULL) {
1510                 do_background_socket(p, NULL, 0);
1511                 proc_rele(p);
1512         }
1513
1514 }
1515
1516 void
1517 proc_set_task_networkbg(void * bsdinfo, int setbg)
1518 {
1519         if (setbg != 0)
1520                 proc_apply_task_networkbg(bsdinfo);
1521         else
1522                 proc_restore_task_networkbg(bsdinfo);
1523 }
1524
1525 void
1526 proc_apply_task_networkbg_internal(proc_t p)
1527 {
1528         if (p != PROC_NULL) {
1529                 do_background_socket(p, NULL, PRIO_DARWIN_BG);
1530         }
1531 }
1532