bsd/kern/kern_resource.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
  29 /*-
  30  * Copyright (c) 1982, 1986, 1991, 1993
  31  *      The Regents of the University of California.  All rights reserved.
  32  * (c) UNIX System Laboratories, Inc.
  33  * All or some portions of this file are derived from material licensed
  34  * to the University of California by American Telephone and Telegraph
  35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  36  * the permission of UNIX System Laboratories, Inc.
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions
  40  * are met:
  41  * 1. Redistributions of source code must retain the above copyright
  42  *    notice, this list of conditions and the following disclaimer.
  43  * 2. Redistributions in binary form must reproduce the above copyright
  44  *    notice, this list of conditions and the following disclaimer in the
  45  *    documentation and/or other materials provided with the distribution.
  46  * 3. All advertising materials mentioning features or use of this software
  47  *    must display the following acknowledgement:
  48  *      This product includes software developed by the University of
  49  *      California, Berkeley and its contributors.
  50  * 4. Neither the name of the University nor the names of its contributors
  51  *    may be used to endorse or promote products derived from this software
  52  *    without specific prior written permission.
  53  *
  54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  64  * SUCH DAMAGE.
  65  *
  66  *      @(#)kern_resource.c     8.5 (Berkeley) 1/21/94
  67  */
  68 /*
  69  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  70  * support for mandatory and extensible security protections.  This notice
  71  * is included in support of clause 2.2 (b) of the Apple Public License,
  72  * Version 2.0.
  73  */
  74
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/sysctl.h>
  78 #include <sys/kernel.h>
  79 #include <sys/file_internal.h>
  80 #include <sys/resourcevar.h>
  81 #include <sys/malloc.h>
  82 #include <sys/proc_internal.h>
  83 #include <sys/kauth.h>
  84 #include <machine/spl.h>
  85
  86 #include <sys/mount_internal.h>
  87 #include <sys/sysproto.h>
  88
  89 #include <bsm/audit_kernel.h>
  90
  91 #include <machine/vmparam.h>
  92
  93 #include <mach/mach_types.h>
  94 #include <mach/time_value.h>
  95 #include <mach/task.h>
  96 #include <mach/task_info.h>
  97 #include <mach/vm_map.h>
  98 #include <mach/mach_vm.h>
  99 #include <mach/thread_act.h>  /* for thread_policy_set( ) */
 100 #include <kern/lock.h>
 101 #include <kern/thread.h>
 102
 103 #include <kern/task.h>
 104 #include <kern/clock.h>         /* for absolutetime_to_microtime() */
 105 #include <netinet/in.h>         /* for TRAFFIC_MGT_SO_BACKGROUND */
 106 #include <sys/socketvar.h>      /* for struct socket */
 107
 108 #include <vm/vm_map.h>
 109
 110 int     donice(struct proc *curp, struct proc *chgp, int n);
 111 int     dosetrlimit(struct proc *p, u_int which, struct rlimit *limp);
 112 static int do_background_thread(struct proc *curp, int priority);
 113
 114 rlim_t maxdmap = MAXDSIZ;       /* XXX */
 115 rlim_t maxsmap = MAXSSIZ - PAGE_SIZE;   /* XXX */
 116
 117 /*
 118  * Limits on the number of open files per process, and the number
 119  * of child processes per process.
 120  *
 121  * Note: would be in kern/subr_param.c in FreeBSD.
 122  */
 123 int maxfilesperproc = OPEN_MAX;         /* per-proc open files limit */
 124
 125 SYSCTL_INT( _kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW,
 126                 &maxprocperuid, 0, "Maximum processes allowed per userid" );
 127
 128 SYSCTL_INT( _kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
 129                 &maxfilesperproc, 0, "Maximum files allowed open per process" );
 130
 131 /* Args and fn for proc_iteration callback used in setpriority */
 132 struct puser_nice_args {
 133         proc_t curp;
 134         int     prio;
 135         id_t    who;
 136         int *   foundp;
 137         int *   errorp;
 138 };
 139 static int puser_donice_callback(proc_t p, void * arg);
 140
 141
 142 /* Args and fn for proc_iteration callback used in setpriority */
 143 struct ppgrp_nice_args {
 144         proc_t curp;
 145         int     prio;
 146         int *   foundp;
 147         int *   errorp;
 148 };
 149 static int ppgrp_donice_callback(proc_t p, void * arg);
 150
 151 /*
 152  * Resource controls and accounting.
 153  */
 154 int
 155 getpriority(struct proc *curp, struct getpriority_args *uap, register_t *retval)
 156 {
 157         struct proc *p;
 158         int low = PRIO_MAX + 1;
 159         kauth_cred_t my_cred;
 160
 161         /* would also test (uap->who < 0), but id_t is unsigned */
 162         if (uap->who > 0x7fffffff)
 163                 return (EINVAL);
 164
 165         switch (uap->which) {
 166
 167         case PRIO_PROCESS:
 168                 if (uap->who == 0) {
 169                         p = curp;
 170                         low = p->p_nice;
 171                 } else {
 172                         p = proc_find(uap->who);
 173                         if (p == 0)
 174                                 break;
 175                         low = p->p_nice;
 176                         proc_rele(p);
 177
 178                 }
 179                 break;
 180
 181         case PRIO_PGRP: {
 182                 struct pgrp *pg = PGRP_NULL;
 183
 184                 if (uap->who == 0) {
 185                         /* returns the pgrp to ref */
 186                         pg = proc_pgrp(curp);
 187                  } else if ((pg = pgfind(uap->who)) == PGRP_NULL) {
 188                         break;
 189                 }
 190                 /* No need for iteration as it is a simple scan */
 191                 pgrp_lock(pg);
 192                 for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) {
 193                         if (p->p_nice < low)
 194                                 low = p->p_nice;
 195                 }
 196                 pgrp_unlock(pg);
 197                 pg_rele(pg);
 198                 break;
 199         }
 200
 201         case PRIO_USER:
 202                 if (uap->who == 0)
 203                         uap->who = kauth_cred_getuid(kauth_cred_get());
 204
 205                 proc_list_lock();
 206
 207                 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 208                         my_cred = kauth_cred_proc_ref(p);
 209                         if (kauth_cred_getuid(my_cred) == uap->who &&
 210                             p->p_nice < low)
 211                                 low = p->p_nice;
 212                         kauth_cred_unref(&my_cred);
 213                 }
 214
 215                 proc_list_unlock();
 216
 217                 break;
 218
 219         case PRIO_DARWIN_THREAD: {
 220                 thread_t                        thread;
 221                 struct uthread          *ut;
 222
 223                 /* we currently only support the current thread */
 224                 if (uap->who != 0) {
 225                         return (EINVAL);
 226                 }
 227
 228                 thread = current_thread();
 229                 ut = get_bsdthread_info(thread);
 230
 231                 low = 0;
 232                 if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
 233                         low = 1;
 234                 }
 235                 break;
 236         }
 237
 238         default:
 239                 return (EINVAL);
 240         }
 241         if (low == PRIO_MAX + 1)
 242                 return (ESRCH);
 243         *retval = low;
 244         return (0);
 245 }
 246
 247 /* call back function used for proc iteration in PRIO_USER */
 248 static int
 249 puser_donice_callback(proc_t p, void * arg)
 250 {
 251         int error, n;
 252         struct puser_nice_args * pun = (struct puser_nice_args *)arg;
 253         kauth_cred_t my_cred;
 254
 255         my_cred = kauth_cred_proc_ref(p);
 256         if (kauth_cred_getuid(my_cred) == pun->who) {
 257                 error = donice(pun->curp, p, pun->prio);
 258                 if (pun->errorp != NULL)
 259                         *pun->errorp = error;
 260                 if (pun->foundp != NULL) {
 261                         n = *pun->foundp;
 262                         *pun->foundp = n+1;
 263                 }
 264         }
 265         kauth_cred_unref(&my_cred);
 266
 267         return(PROC_RETURNED);
 268 }
 269
 270 /* call back function used for proc iteration in PRIO_PGRP */
 271 static int
 272 ppgrp_donice_callback(proc_t p, void * arg)
 273 {
 274         int error;
 275         struct ppgrp_nice_args * pun = (struct ppgrp_nice_args *)arg;
 276         int n;
 277
 278         error = donice(pun->curp, p, pun->prio);
 279         if (pun->errorp != NULL)
 280                 *pun->errorp = error;
 281         if (pun->foundp!= NULL) {
 282                 n = *pun->foundp;
 283                 *pun->foundp = n+1;
 284         }
 285
 286         return(PROC_RETURNED);
 287 }
 288
 289 /*
 290  * Returns:     0                       Success
 291  *              EINVAL
 292  *              ESRCH
 293  *      donice:EPERM
 294  *      donice:EACCES
 295  */
 296 /* ARGSUSED */
 297 int
 298 setpriority(struct proc *curp, struct setpriority_args *uap, __unused register_t *retval)
 299 {
 300         struct proc *p;
 301         int found = 0, error = 0;
 302         int refheld = 0;
 303
 304         AUDIT_ARG(cmd, uap->which);
 305         AUDIT_ARG(owner, uap->who, 0);
 306         AUDIT_ARG(value, uap->prio);
 307
 308         /* would also test (uap->who < 0), but id_t is unsigned */
 309         if (uap->who > 0x7fffffff)
 310                 return (EINVAL);
 311
 312         switch (uap->which) {
 313
 314         case PRIO_PROCESS:
 315                 if (uap->who == 0)
 316                         p = curp;
 317                 else {
 318                         p = proc_find(uap->who);
 319                         if (p == 0)
 320                                 break;
 321                         refheld = 1;
 322                 }
 323                 error = donice(curp, p, uap->prio);
 324                 found++;
 325                 if (refheld != 0)
 326                         proc_rele(p);
 327                 break;
 328
 329         case PRIO_PGRP: {
 330                 struct pgrp *pg = PGRP_NULL;
 331                 struct ppgrp_nice_args ppgrp;
 332
 333                 if (uap->who == 0) {
 334                         pg = proc_pgrp(curp);
 335                  } else if ((pg = pgfind(uap->who)) == PGRP_NULL)
 336                         break;
 337
 338                 ppgrp.curp = curp;
 339                 ppgrp.prio = uap->prio;
 340                 ppgrp.foundp = &found;
 341                 ppgrp.errorp = &error;
 342
 343                 /* PGRP_DROPREF drops the reference on process group */
 344                 pgrp_iterate(pg, PGRP_DROPREF, ppgrp_donice_callback, (void *)&ppgrp, NULL, NULL);
 345
 346                 break;
 347         }
 348
 349         case PRIO_USER: {
 350                 struct puser_nice_args punice;
 351
 352                 if (uap->who == 0)
 353                         uap->who = kauth_cred_getuid(kauth_cred_get());
 354
 355                 punice.curp = curp;
 356                 punice.prio = uap->prio;
 357                 punice.who = uap->who;
 358                 punice.foundp = &found;
 359                 error = 0;
 360                 punice.errorp = &error;
 361                 proc_iterate(PROC_ALLPROCLIST, puser_donice_callback, (void *)&punice, NULL, NULL);
 362
 363                 break;
 364         }
 365
 366         case PRIO_DARWIN_THREAD: {
 367                 /* we currently only support the current thread */
 368                 if (uap->who != 0) {
 369                         return (EINVAL);
 370                 }
 371                 error = do_background_thread(curp, uap->prio);
 372                 found++;
 373                 break;
 374         }
 375
 376         default:
 377                 return (EINVAL);
 378         }
 379         if (found == 0)
 380                 return (ESRCH);
 381         return (error);
 382 }
 383
 384
 385 /*
 386  * Returns:     0                       Success
 387  *              EPERM
 388  *              EACCES
 389  *      mac_check_proc_sched:???
 390  */
 391 int
 392 donice(struct proc *curp, struct proc *chgp, int n)
 393 {
 394         int error = 0;
 395         kauth_cred_t ucred;
 396         kauth_cred_t my_cred;
 397
 398         ucred = kauth_cred_proc_ref(curp);
 399         my_cred = kauth_cred_proc_ref(chgp);
 400
 401         if (suser(ucred, NULL) && ucred->cr_ruid &&
 402             kauth_cred_getuid(ucred) != kauth_cred_getuid(my_cred) &&
 403             ucred->cr_ruid != kauth_cred_getuid(my_cred)) {
 404                 error = EPERM;
 405                 goto out;
 406         }
 407         if (n > PRIO_MAX)
 408                 n = PRIO_MAX;
 409         if (n < PRIO_MIN)
 410                 n = PRIO_MIN;
 411         if (n < chgp->p_nice && suser(ucred, &curp->p_acflag)) {
 412                 error = EACCES;
 413                 goto out;
 414         }
 415 #if CONFIG_MACF
 416         error = mac_proc_check_sched(curp, chgp);
 417         if (error)
 418                 goto out;
 419 #endif
 420         proc_lock(chgp);
 421         chgp->p_nice = n;
 422         proc_unlock(chgp);
 423         (void)resetpriority(chgp);
 424 out:
 425         kauth_cred_unref(&ucred);
 426         kauth_cred_unref(&my_cred);
 427         return (error);
 428 }
 429
 430 /*
 431  * do_background_thread
 432  * Returns:     0                       Success
 433  * XXX - todo - does this need a MACF hook?
 434  */
 435 static int
 436 do_background_thread(struct proc *curp, int priority)
 437 {
 438         int                                                                     i;
 439         thread_t                                                        thread;
 440         struct uthread                                          *ut;
 441         thread_precedence_policy_data_t         policy;
 442         struct filedesc                                         *fdp;
 443         struct fileproc                                         *fp;
 444
 445         thread = current_thread();
 446         ut = get_bsdthread_info(thread);
 447
 448         if ( (priority & PRIO_DARWIN_BG) == 0 ) {
 449                 /* turn off backgrounding of thread */
 450                 if ( (ut->uu_flag & UT_BACKGROUND) == 0 ) {
 451                         /* already off */
 452                         return(0);
 453                 }
 454
 455                 /* clear background bit in thread and disable disk IO throttle */
 456                 ut->uu_flag &= ~UT_BACKGROUND;
 457                 ut->uu_iopol_disk = IOPOL_NORMAL;
 458
 459                 /* reset thread priority (we did not save previous value) */
 460                 policy.importance = 0;
 461                 thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
 462                                                    (thread_policy_t)&policy,
 463                                                    THREAD_PRECEDENCE_POLICY_COUNT );
 464
 465                 /* disable networking IO throttle.
 466                  * NOTE - It is a known limitation of the current design that we
 467                  * could potentially clear TRAFFIC_MGT_SO_BACKGROUND bit for
 468                  * sockets created by other threads within this process.
 469                  */
 470                 proc_fdlock(curp);
 471                 fdp = curp->p_fd;
 472                 for ( i = 0; i < fdp->fd_nfiles; i++ ) {
 473                         struct socket           *sockp;
 474
 475                         fp = fdp->fd_ofiles[ i ];
 476                         if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 ||
 477                                  fp->f_fglob->fg_type != DTYPE_SOCKET ) {
 478                                 continue;
 479                         }
 480                         sockp = (struct socket *)fp->f_fglob->fg_data;
 481                         if ( sockp->so_background_thread != thread ) {
 482                                 continue;
 483                         }
 484                         sockp->so_traffic_mgt_flags &= ~TRAFFIC_MGT_SO_BACKGROUND;
 485                         sockp->so_background_thread = NULL;
 486                 }
 487                 proc_fdunlock(curp);
 488
 489                 return(0);
 490         }
 491
 492         /* background this thread */
 493         if ( (ut->uu_flag & UT_BACKGROUND) != 0 ) {
 494                 /* already backgrounded */
 495                 return(0);
 496         }
 497
 498         /* tag thread as background and throttle disk IO */
 499         ut->uu_flag |= UT_BACKGROUND;
 500         ut->uu_iopol_disk = IOPOL_THROTTLE;
 501
 502         policy.importance = INT_MIN;
 503         thread_policy_set( thread, THREAD_PRECEDENCE_POLICY,
 504                                            (thread_policy_t)&policy,
 505                                            THREAD_PRECEDENCE_POLICY_COUNT );
 506
 507         /* throttle networking IO happens in socket( ) syscall.
 508          * If UT_BACKGROUND is set in the current thread then
 509          * TRAFFIC_MGT_SO_BACKGROUND socket option is set.
 510          */
 511         return(0);
 512 }
 513
 514
 515 /*
 516  * Returns:     0                       Success
 517  *      copyin:EFAULT
 518  *      dosetrlimit:
 519  */
 520 /* ARGSUSED */
 521 int
 522 setrlimit(struct proc *p, struct setrlimit_args *uap, __unused register_t *retval)
 523 {
 524         struct rlimit alim;
 525         int error;
 526
 527         if ((error = copyin(uap->rlp, (caddr_t)&alim,
 528             sizeof (struct rlimit))))
 529                 return (error);
 530
 531         return (dosetrlimit(p, uap->which, &alim));
 532 }
 533
 534 /*
 535  * Returns:     0                       Success
 536  *              EINVAL
 537  *              ENOMEM                  Cannot copy limit structure
 538  *      suser:EPERM
 539  *
 540  * Notes:       EINVAL is returned both for invalid arguments, and in the
 541  *              case that the current usage (e.g. RLIMIT_STACK) is already
 542  *              in excess of the requested limit.
 543  */
 544 int
 545 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
 546 {
 547         struct rlimit *alimp;
 548         int error;
 549         kern_return_t   kr;
 550         int posix = (which & _RLIMIT_POSIX_FLAG) ? 1 : 0;
 551
 552         /* Mask out POSIX flag, saved above */
 553         which &= ~_RLIMIT_POSIX_FLAG;
 554
 555         if (which >= RLIM_NLIMITS)
 556                 return (EINVAL);
 557
 558         alimp = &p->p_rlimit[which];
 559         if (limp->rlim_cur > limp->rlim_max)
 560                 return EINVAL;
 561
 562         if (limp->rlim_cur > alimp->rlim_max ||
 563             limp->rlim_max > alimp->rlim_max)
 564                 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
 565                         return (error);
 566         }
 567
 568         proc_limitblock(p);
 569
 570         if ((error = proc_limitreplace(p)) != 0) {
 571                 proc_limitunblock(p);
 572                 return(error);
 573         }
 574
 575         alimp = &p->p_rlimit[which];
 576
 577         switch (which) {
 578
 579         case RLIMIT_CPU:
 580                 if (limp->rlim_cur == RLIM_INFINITY) {
 581                         task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
 582                         timerclear(&p->p_rlim_cpu);
 583                 }
 584                 else {
 585                         task_absolutetime_info_data_t   tinfo;
 586                         mach_msg_type_number_t                  count;
 587                         struct timeval                                  ttv, tv;
 588
 589                         count = TASK_ABSOLUTETIME_INFO_COUNT;
 590                         task_info(p->task, TASK_ABSOLUTETIME_INFO,
 591                                                                 (task_info_t)&tinfo, &count);
 592                         absolutetime_to_microtime(tinfo.total_user + tinfo.total_system,
 593                                                                 (uint32_t *) &ttv.tv_sec, (uint32_t *) &ttv.tv_usec);
 594
 595                         tv.tv_sec = (limp->rlim_cur > __INT_MAX__ ? __INT_MAX__ : limp->rlim_cur);
 596                         tv.tv_usec = 0;
 597                         timersub(&tv, &ttv, &p->p_rlim_cpu);
 598
 599                         timerclear(&tv);
 600                         if (timercmp(&p->p_rlim_cpu, &tv, >))
 601                                 task_vtimer_set(p->task, TASK_VTIMER_RLIM);
 602                         else {
 603                                 task_vtimer_clear(p->task, TASK_VTIMER_RLIM);
 604
 605                                 timerclear(&p->p_rlim_cpu);
 606
 607                                 psignal(p, SIGXCPU);
 608                         }
 609                 }
 610                 break;
 611
 612         case RLIMIT_DATA:
 613                 if (limp->rlim_cur > maxdmap)
 614                         limp->rlim_cur = maxdmap;
 615                 if (limp->rlim_max > maxdmap)
 616                         limp->rlim_max = maxdmap;
 617                 break;
 618
 619         case RLIMIT_STACK:
 620                 /* Disallow illegal stack size instead of clipping */
 621                 if (limp->rlim_cur > maxsmap ||
 622                     limp->rlim_max > maxsmap) {
 623                         if (posix) {
 624                                 error = EINVAL;
 625                                 goto out;
 626                         }
 627                         else {
 628                                 /*
 629                                  * 4797860 - workaround poorly written installers by
 630                                  * doing previous implementation (< 10.5) when caller
 631                                  * is non-POSIX conforming.
 632                                  */
 633                                 if (limp->rlim_cur > maxsmap)
 634                                         limp->rlim_cur = maxsmap;
 635                                 if (limp->rlim_max > maxsmap)
 636                                         limp->rlim_max = maxsmap;
 637                         }
 638                 }
 639
 640                 /*
 641                  * Stack is allocated to the max at exec time with only
 642                  * "rlim_cur" bytes accessible.  If stack limit is going
 643                  * up make more accessible, if going down make inaccessible.
 644                  */
 645                 if (limp->rlim_cur > alimp->rlim_cur) {
 646                         user_addr_t addr;
 647                         user_size_t size;
 648
 649                                 /* grow stack */
 650                                 size = round_page_64(limp->rlim_cur);
 651                                 size -= round_page_64(alimp->rlim_cur);
 652
 653 #if STACK_GROWTH_UP
 654                                 /* go to top of current stack */
 655                         addr = p->user_stack + round_page_64(alimp->rlim_cur);
 656 #else   /* STACK_GROWTH_UP */
 657                         addr = p->user_stack - round_page_64(limp->rlim_cur);
 658 #endif /* STACK_GROWTH_UP */
 659                         kr = mach_vm_protect(current_map(),
 660                                              addr, size,
 661                                              FALSE, VM_PROT_DEFAULT);
 662                         if (kr != KERN_SUCCESS) {
 663                                 error =  EINVAL;
 664                                 goto out;
 665                         }
 666                 } else if (limp->rlim_cur < alimp->rlim_cur) {
 667                         user_addr_t addr;
 668                         user_size_t size;
 669                         user_addr_t cur_sp;
 670
 671                                 /* shrink stack */
 672
 673                         /*
 674                          * First check if new stack limit would agree
 675                          * with current stack usage.
 676                          * Get the current thread's stack pointer...
 677                          */
 678                         cur_sp = thread_adjuserstack(current_thread(),
 679                                                      0);
 680 #if STACK_GROWTH_UP
 681                         if (cur_sp >= p->user_stack &&
 682                             cur_sp < (p->user_stack +
 683                                       round_page_64(alimp->rlim_cur))) {
 684                                 /* current stack pointer is in main stack */
 685                                 if (cur_sp >= (p->user_stack +
 686                                                round_page_64(limp->rlim_cur))) {
 687                                         /*
 688                                          * New limit would cause
 689                                          * current usage to be invalid:
 690                                          * reject new limit.
 691                                          */
 692                                         error =  EINVAL;
 693                                         goto out;
 694                         }
 695                         } else {
 696                                 /* not on the main stack: reject */
 697                                 error =  EINVAL;
 698                                 goto out;
 699                 }
 700
 701 #else   /* STACK_GROWTH_UP */
 702                         if (cur_sp <= p->user_stack &&
 703                             cur_sp > (p->user_stack -
 704                                       round_page_64(alimp->rlim_cur))) {
 705                                 /* stack pointer is in main stack */
 706                                 if (cur_sp <= (p->user_stack -
 707                                                round_page_64(limp->rlim_cur))) {
 708                                         /*
 709                                          * New limit would cause
 710                                          * current usage to be invalid:
 711                                          * reject new limit.
 712                                          */
 713                                         error =  EINVAL;
 714                                         goto out;
 715                                 }
 716                         } else {
 717                                 /* not on the main stack: reject */
 718                                 error =  EINVAL;
 719                                 goto out;
 720                         }
 721 #endif  /* STACK_GROWTH_UP */
 722
 723                         size = round_page_64(alimp->rlim_cur);
 724                         size -= round_page_64(limp->rlim_cur);
 725
 726 #if STACK_GROWTH_UP
 727                         addr = p->user_stack + round_page_64(limp->rlim_cur);
 728 #else   /* STACK_GROWTH_UP */
 729                         addr = p->user_stack - round_page_64(alimp->rlim_cur);
 730 #endif /* STACK_GROWTH_UP */
 731
 732                         kr = mach_vm_protect(current_map(),
 733                                              addr, size,
 734                                              FALSE, VM_PROT_NONE);
 735                         if (kr != KERN_SUCCESS) {
 736                                 error =  EINVAL;
 737                                 goto out;
 738                         }
 739                 } else {
 740                         /* no change ... */
 741                 }
 742                 break;
 743
 744         case RLIMIT_NOFILE:
 745                 /*
 746                  * Only root can set the maxfiles limits, as it is
 747                  * systemwide resource.  If we are expecting POSIX behavior,
 748                  * instead of clamping the value, return EINVAL.  We do this
 749                  * because historically, people have been able to attempt to
 750                  * set RLIM_INFINITY to get "whatever the maximum is".
 751                 */
 752                 if ( is_suser() ) {
 753                         if (limp->rlim_cur != alimp->rlim_cur &&
 754                             limp->rlim_cur > (rlim_t)maxfiles) {
 755                                 if (posix) {
 756                                         error =  EINVAL;
 757                                         goto out;
 758                                 }
 759                                 limp->rlim_cur = maxfiles;
 760                         }
 761                         if (limp->rlim_max != alimp->rlim_max &&
 762                             limp->rlim_max > (rlim_t)maxfiles)
 763                                 limp->rlim_max = maxfiles;
 764                 }
 765                 else {
 766                         if (limp->rlim_cur != alimp->rlim_cur &&
 767                             limp->rlim_cur > (rlim_t)maxfilesperproc) {
 768                                 if (posix) {
 769                                         error =  EINVAL;
 770                                         goto out;
 771                                 }
 772                                 limp->rlim_cur = maxfilesperproc;
 773                         }
 774                         if (limp->rlim_max != alimp->rlim_max &&
 775                             limp->rlim_max > (rlim_t)maxfilesperproc)
 776                                 limp->rlim_max = maxfilesperproc;
 777                 }
 778                 break;
 779
 780         case RLIMIT_NPROC:
 781                 /*
 782                  * Only root can set to the maxproc limits, as it is
 783                  * systemwide resource; all others are limited to
 784                  * maxprocperuid (presumably less than maxproc).
 785                  */
 786                 if ( is_suser() ) {
 787                         if (limp->rlim_cur > (rlim_t)maxproc)
 788                                 limp->rlim_cur = maxproc;
 789                         if (limp->rlim_max > (rlim_t)maxproc)
 790                                 limp->rlim_max = maxproc;
 791                 }
 792                 else {
 793                         if (limp->rlim_cur > (rlim_t)maxprocperuid)
 794                                 limp->rlim_cur = maxprocperuid;
 795                         if (limp->rlim_max > (rlim_t)maxprocperuid)
 796                                 limp->rlim_max = maxprocperuid;
 797                 }
 798                 break;
 799
 800         case RLIMIT_MEMLOCK:
 801                 /*
 802                  * Tell the Mach VM layer about the new limit value.
 803                  */
 804
 805                 vm_map_set_user_wire_limit(current_map(), limp->rlim_cur);
 806                 break;
 807
 808         } /* switch... */
 809         proc_lock(p);
 810         *alimp = *limp;
 811         proc_unlock(p);
 812         error = 0;
 813 out:
 814         proc_limitunblock(p);
 815         return (error);
 816 }
 817
 818 /* ARGSUSED */
 819 int
 820 getrlimit(struct proc *p, struct getrlimit_args *uap, __unused register_t *retval)
 821 {
 822         struct rlimit lim;
 823
 824         /*
 825          * Take out flag now in case we need to use it to trigger variant
 826          * behaviour later.
 827          */
 828         uap->which &= ~_RLIMIT_POSIX_FLAG;
 829
 830         if (uap->which >= RLIM_NLIMITS)
 831                 return (EINVAL);
 832         proc_limitget(p, uap->which, &lim);
 833         return (copyout((caddr_t)&lim,
 834                         uap->rlp, sizeof (struct rlimit)));
 835 }
 836
 837 /*
 838  * Transform the running time and tick information in proc p into user,
 839  * system, and interrupt time usage.
 840  */
 841 /* No lock on proc is held for this.. */
 842 void
 843 calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip)
 844 {
 845         task_t                  task;
 846
 847         timerclear(up);
 848         timerclear(sp);
 849         if (ip != NULL)
 850                 timerclear(ip);
 851
 852         task = p->task;
 853         if (task) {
 854                 task_basic_info_data_t tinfo;
 855                 task_thread_times_info_data_t ttimesinfo;
 856                 mach_msg_type_number_t task_info_stuff, task_ttimes_stuff;
 857                 struct timeval ut,st;
 858
 859                 task_info_stuff = TASK_BASIC_INFO_COUNT;
 860                 task_info(task, TASK_BASIC_INFO,
 861                           (task_info_t)&tinfo, &task_info_stuff);
 862                 ut.tv_sec = tinfo.user_time.seconds;
 863                 ut.tv_usec = tinfo.user_time.microseconds;
 864                 st.tv_sec = tinfo.system_time.seconds;
 865                 st.tv_usec = tinfo.system_time.microseconds;
 866                 timeradd(&ut, up, up);
 867                 timeradd(&st, sp, sp);
 868
 869                 task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT;
 870                 task_info(task, TASK_THREAD_TIMES_INFO,
 871                           (task_info_t)&ttimesinfo, &task_ttimes_stuff);
 872
 873                 ut.tv_sec = ttimesinfo.user_time.seconds;
 874                 ut.tv_usec = ttimesinfo.user_time.microseconds;
 875                 st.tv_sec = ttimesinfo.system_time.seconds;
 876                 st.tv_usec = ttimesinfo.system_time.microseconds;
 877                 timeradd(&ut, up, up);
 878                 timeradd(&st, sp, sp);
 879         }
 880 }
 881
 882 __private_extern__ void munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p);
 883
 884 /* ARGSUSED */
 885 int
 886 getrusage(struct proc *p, struct getrusage_args *uap, __unused register_t *retval)
 887 {
 888         struct rusage *rup, rubuf;
 889         struct user_rusage rubuf64;
 890         size_t retsize = sizeof(rubuf);                 /* default: 32 bits */
 891         caddr_t retbuf = (caddr_t)&rubuf;               /* default: 32 bits */
 892         struct timeval utime;
 893         struct timeval stime;
 894
 895
 896         switch (uap->who) {
 897         case RUSAGE_SELF:
 898                 calcru(p, &utime, &stime, NULL);
 899                 // LP64todo: proc struct should have 64 bit version of struct
 900                 proc_lock(p);
 901                 rup = &p->p_stats->p_ru;
 902                 rup->ru_utime = utime;
 903                 rup->ru_stime = stime;
 904
 905                 rubuf = *rup;
 906                 proc_unlock(p);
 907
 908                 break;
 909
 910         case RUSAGE_CHILDREN:
 911                 proc_lock(p);
 912                 rup = &p->p_stats->p_cru;
 913                 rubuf = *rup;
 914                 proc_unlock(p);
 915                 break;
 916
 917         default:
 918                 return (EINVAL);
 919         }
 920         if (IS_64BIT_PROCESS(p)) {
 921                 retsize = sizeof(rubuf64);
 922                 retbuf = (caddr_t)&rubuf64;
 923                 munge_rusage(&rubuf, &rubuf64);
 924         }
 925         return (copyout(retbuf, uap->rusage, retsize));
 926 }
 927
 928 void
 929 ruadd(struct rusage *ru, struct rusage *ru2)
 930 {
 931         long *ip, *ip2;
 932         int i;
 933
 934         timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
 935         timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
 936         if (ru->ru_maxrss < ru2->ru_maxrss)
 937                 ru->ru_maxrss = ru2->ru_maxrss;
 938         ip = &ru->ru_first; ip2 = &ru2->ru_first;
 939         for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
 940                 *ip++ += *ip2++;
 941 }
 942
 943 void
 944 proc_limitget(proc_t p, int which, struct rlimit * limp)
 945 {
 946         proc_list_lock();
 947         limp->rlim_cur = p->p_rlimit[which].rlim_cur;
 948         limp->rlim_max = p->p_rlimit[which].rlim_max;
 949         proc_list_unlock();
 950 }
 951
 952
 953 void
 954 proc_limitdrop(proc_t p, int exiting)
 955 {
 956         struct  plimit * freelim = NULL;
 957         struct  plimit * freeoldlim = NULL;
 958
 959         proc_list_lock();
 960
 961         if (--p->p_limit->pl_refcnt == 0) {
 962                 freelim = p->p_limit;
 963                 p->p_limit = NULL;
 964         }
 965         if ((exiting != 0) && (p->p_olimit != NULL) && (--p->p_olimit->pl_refcnt == 0)) {
 966                 freeoldlim =  p->p_olimit;
 967                 p->p_olimit = NULL;
 968         }
 969
 970         proc_list_unlock();
 971         if (freelim != NULL)
 972                 FREE_ZONE(freelim, sizeof *p->p_limit, M_PLIMIT);
 973         if (freeoldlim != NULL)
 974                 FREE_ZONE(freeoldlim, sizeof *p->p_olimit, M_PLIMIT);
 975 }
 976
 977
 978 void
 979 proc_limitfork(proc_t parent, proc_t child)
 980 {
 981         proc_list_lock();
 982         child->p_limit = parent->p_limit;
 983         child->p_limit->pl_refcnt++;
 984         child->p_olimit = NULL;
 985         proc_list_unlock();
 986 }
 987
 988 void
 989 proc_limitblock(proc_t p)
 990 {
 991         proc_lock(p);
 992         while (p->p_lflag & P_LLIMCHANGE) {
 993                 p->p_lflag |= P_LLIMWAIT;
 994                 msleep(&p->p_olimit, &p->p_mlock, 0, "proc_limitblock", NULL);
 995         }
 996         p->p_lflag |= P_LLIMCHANGE;
 997         proc_unlock(p);
 998
 999 }
1000
1001
1002 void
1003 proc_limitunblock(proc_t p)
1004 {
1005         proc_lock(p);
1006         p->p_lflag &= ~P_LLIMCHANGE;
1007         if (p->p_lflag & P_LLIMWAIT) {
1008                 p->p_lflag &= ~P_LLIMWAIT;
1009                 wakeup(&p->p_olimit);
1010         }
1011         proc_unlock(p);
1012 }
1013
1014 /* This is called behind serialization provided by proc_limitblock/unlbock */
1015 int
1016 proc_limitreplace(proc_t p)
1017 {
1018         struct plimit *copy;
1019
1020
1021         proc_list_lock();
1022
1023         if (p->p_limit->pl_refcnt == 1) {
1024                 proc_list_unlock();
1025                 return(0);
1026         }
1027
1028         proc_list_unlock();
1029
1030         MALLOC_ZONE(copy, struct plimit *,
1031                         sizeof(struct plimit), M_PLIMIT, M_WAITOK);
1032         if (copy == NULL) {
1033                 return(ENOMEM);
1034         }
1035
1036         proc_list_lock();
1037         bcopy(p->p_limit->pl_rlimit, copy->pl_rlimit,
1038             sizeof(struct rlimit) * RLIM_NLIMITS);
1039         copy->pl_refcnt = 1;
1040         /* hang on to reference to old till process exits */
1041         p->p_olimit = p->p_limit;
1042         p->p_limit = copy;
1043         proc_list_unlock();
1044
1045         return(0);
1046 }
1047
1048
1049 /*
1050  * iopolicysys
1051  *
1052  * Description: System call MUX for use in manipulating I/O policy attributes of the current process or thread
1053  *
1054  * Parameters:  cmd                             Policy command
1055  *              arg                             Pointer to policy arguments
1056  *
1057  * Returns:     0                               Success
1058  *              EINVAL                          Invalid command or invalid policy arguments
1059  *
1060  */
1061 int
1062 iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused register_t *retval)
1063 {
1064         int     error = 0;
1065         thread_t thread = THREAD_NULL;
1066         int *policy;
1067         struct uthread  *ut = NULL;
1068         struct _iopol_param_t iop_param;
1069
1070         if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0)
1071                 goto exit;
1072
1073         if (iop_param.iop_iotype != IOPOL_TYPE_DISK) {
1074                 error = EINVAL;
1075                 goto exit;
1076         }
1077
1078         switch (iop_param.iop_scope) {
1079         case IOPOL_SCOPE_PROCESS:
1080                 policy = &p->p_iopol_disk;
1081                 break;
1082         case IOPOL_SCOPE_THREAD:
1083                 thread = current_thread();
1084                 ut = get_bsdthread_info(thread);
1085                 policy = &ut->uu_iopol_disk;
1086                 break;
1087         default:
1088                 error = EINVAL;
1089                 goto exit;
1090         }
1091
1092         switch(uap->cmd) {
1093         case IOPOL_CMD_SET:
1094                 switch (iop_param.iop_policy) {
1095                 case IOPOL_DEFAULT:
1096                 case IOPOL_NORMAL:
1097                 case IOPOL_THROTTLE:
1098                 case IOPOL_PASSIVE:
1099                         proc_lock(p);
1100                         *policy = iop_param.iop_policy;
1101                         proc_unlock(p);
1102                         break;
1103                 default:
1104                         error = EINVAL;
1105                         goto exit;
1106                 }
1107                 break;
1108         case IOPOL_CMD_GET:
1109                 switch (*policy) {
1110                 case IOPOL_DEFAULT:
1111                 case IOPOL_NORMAL:
1112                 case IOPOL_THROTTLE:
1113                 case IOPOL_PASSIVE:
1114                         iop_param.iop_policy = *policy;
1115                         break;
1116                 default: // in-kernel
1117                         // this should never happen
1118                         printf("%s: unknown I/O policy %d\n", __func__, *policy);
1119                         // restore to default value
1120                         *policy = IOPOL_DEFAULT;
1121                         iop_param.iop_policy = *policy;
1122                 }
1123
1124                 error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param));
1125                 break;
1126         default:
1127                 error = EINVAL; // unknown command
1128                 break;
1129         }
1130
1131   exit:
1132         *retval = error;
1133         return (error);
1134 }