osfmk/kern/mk_sp.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*
  26  * @OSF_COPYRIGHT@
  27  *
  28  */
  29
  30 /***
  31  *** ??? The following lines were picked up when code was incorporated
  32  *** into this file from `kern/syscall_subr.c.'  These should be moved
  33  *** with the code if it moves again.  Otherwise, they should be trimmed,
  34  *** based on the files included above.
  35  ***/
  36
  37 #include <mach/boolean.h>
  38 #include <mach/thread_switch.h>
  39 #include <ipc/ipc_port.h>
  40 #include <ipc/ipc_space.h>
  41 #include <kern/ipc_kobject.h>
  42 #include <kern/processor.h>
  43 #include <kern/sched.h>
  44 #include <kern/sched_prim.h>
  45 #include <kern/spl.h>
  46 #include <kern/task.h>
  47 #include <kern/thread.h>
  48 #include <kern/ast.h>
  49 #include <mach/policy.h>
  50
  51 #include <kern/syscall_subr.h>
  52 #include <mach/mach_host_server.h>
  53 #include <mach/mach_syscalls.h>
  54
  55 /***
  56  *** ??? End of lines picked up when code was incorporated
  57  *** into this file from `kern/syscall_subr.c.'
  58  ***/
  59
  60 #include <kern/mk_sp.h>
  61 #include <kern/misc_protos.h>
  62 #include <kern/spl.h>
  63 #include <kern/sched.h>
  64 #include <kern/sched_prim.h>
  65 #include <kern/assert.h>
  66 #include <kern/thread.h>
  67 #include <mach/mach_host_server.h>
  68
  69 /***
  70  *** ??? The next two files supply the prototypes for `thread_set_policy()'
  71  *** and `thread_policy.'  These routines cannot stay here if they are
  72  *** exported Mach system calls.
  73  ***/
  74 #include <mach/thread_act_server.h>
  75 #include <mach/host_priv_server.h>
  76
  77 void
  78 _mk_sp_thread_unblock(
  79         thread_t                        thread)
  80 {
  81         if (thread->state & TH_IDLE)
  82                 return;
  83
  84         if (thread->sched_mode & TH_MODE_REALTIME) {
  85                 thread->realtime.deadline = mach_absolute_time();
  86                 thread->realtime.deadline += thread->realtime.constraint;
  87         }
  88
  89         thread->current_quantum = 0;
  90         thread->computation_metered = 0;
  91         thread->reason = AST_NONE;
  92 }
  93
  94 void
  95 _mk_sp_thread_done(
  96         thread_t                        old_thread,
  97         thread_t                        new_thread,
  98         processor_t                     processor)
  99 {
 100         /*
 101          * A running thread is being taken off a processor:
 102          */
 103         processor->last_dispatch = mach_absolute_time();
 104
 105         if (old_thread->state & TH_IDLE)
 106                 return;
 107
 108         /*
 109          * Compute remainder of current quantum.
 110          */
 111         if (            first_timeslice(processor)                                                      &&
 112                         processor->quantum_end > processor->last_dispatch               )
 113                 old_thread->current_quantum =
 114                         (processor->quantum_end - processor->last_dispatch);
 115         else
 116                 old_thread->current_quantum = 0;
 117
 118         if (old_thread->sched_mode & TH_MODE_REALTIME) {
 119                 /*
 120                  * Cancel the deadline if the thread has
 121                  * consumed the entire quantum.
 122                  */
 123                 if (old_thread->current_quantum == 0) {
 124                         old_thread->realtime.deadline = UINT64_MAX;
 125                         old_thread->reason |= AST_QUANTUM;
 126                 }
 127         }
 128         else {
 129                 /*
 130                  * For non-realtime threads treat a tiny
 131                  * remaining quantum as an expired quantum
 132                  * but include what's left next time.
 133                  */
 134                 if (old_thread->current_quantum < min_std_quantum) {
 135                         old_thread->reason |= AST_QUANTUM;
 136                         old_thread->current_quantum += std_quantum;
 137                 }
 138         }
 139
 140         /*
 141          * If we are doing a direct handoff then
 142          * give the remainder of our quantum to
 143          * the next guy.
 144          */
 145         if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
 146                 new_thread->current_quantum = old_thread->current_quantum;
 147                 old_thread->reason |= AST_QUANTUM;
 148                 old_thread->current_quantum = 0;
 149         }
 150
 151         old_thread->last_switch = processor->last_dispatch;
 152
 153         old_thread->computation_metered +=
 154                         (old_thread->last_switch - old_thread->computation_epoch);
 155 }
 156
 157 void
 158 _mk_sp_thread_begin(
 159         thread_t                        thread,
 160         processor_t                     processor)
 161 {
 162
 163         /*
 164          * The designated thread is beginning execution:
 165          */
 166         if (thread->state & TH_IDLE) {
 167                 timer_call_cancel(&processor->quantum_timer);
 168                 processor->timeslice = 1;
 169
 170                 return;
 171         }
 172
 173         if (thread->current_quantum == 0)
 174                 thread_quantum_init(thread);
 175
 176         processor->quantum_end =
 177                                 (processor->last_dispatch + thread->current_quantum);
 178         timer_call_enter1(&processor->quantum_timer,
 179                                                         thread, processor->quantum_end);
 180
 181         processor_timeslice_setup(processor, thread);
 182
 183         thread->last_switch = processor->last_dispatch;
 184
 185         thread->computation_epoch = thread->last_switch;
 186 }
 187
 188 void
 189 _mk_sp_thread_dispatch(
 190         thread_t                thread)
 191 {
 192         if (thread->reason & AST_QUANTUM)
 193                 thread_setrun(thread, SCHED_TAILQ);
 194         else
 195         if (thread->reason & AST_PREEMPT)
 196                 thread_setrun(thread, SCHED_HEADQ);
 197         else
 198                 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
 199
 200         thread->reason = AST_NONE;
 201 }
 202
 203 /*
 204  *      thread_policy_common:
 205  *
 206  *      Set scheduling policy & priority for thread.
 207  */
 208 static kern_return_t
 209 thread_policy_common(
 210         thread_t                thread,
 211         integer_t               policy,
 212         integer_t               priority)
 213 {
 214         spl_t                   s;
 215
 216         if (    thread == THREAD_NULL           ||
 217                         invalid_policy(policy)          )
 218                 return(KERN_INVALID_ARGUMENT);
 219
 220         s = splsched();
 221         thread_lock(thread);
 222
 223         if (    !(thread->sched_mode & TH_MODE_REALTIME)        &&
 224                         !(thread->safe_mode & TH_MODE_REALTIME)                 ) {
 225                 if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
 226                         integer_t       oldmode = (thread->sched_mode & TH_MODE_TIMESHARE);
 227
 228                         if (policy == POLICY_TIMESHARE && !oldmode) {
 229                                 thread->sched_mode |= TH_MODE_TIMESHARE;
 230
 231                                 if (thread->state & TH_RUN)
 232                                         pset_share_incr(thread->processor_set);
 233                         }
 234                         else
 235                         if (policy != POLICY_TIMESHARE && oldmode) {
 236                                 thread->sched_mode &= ~TH_MODE_TIMESHARE;
 237
 238                                 if (thread->state & TH_RUN)
 239                                         pset_share_decr(thread->processor_set);
 240                         }
 241                 }
 242                 else {
 243                         if (policy == POLICY_TIMESHARE)
 244                                 thread->safe_mode |= TH_MODE_TIMESHARE;
 245                         else
 246                                 thread->safe_mode &= ~TH_MODE_TIMESHARE;
 247                 }
 248
 249                 if (priority >= thread->max_priority)
 250                         priority = thread->max_priority - thread->task_priority;
 251                 else
 252                 if (priority >= MINPRI_KERNEL)
 253                         priority -= MINPRI_KERNEL;
 254                 else
 255                 if (priority >= MINPRI_SYSTEM)
 256                         priority -= MINPRI_SYSTEM;
 257                 else
 258                         priority -= BASEPRI_DEFAULT;
 259
 260                 priority += thread->task_priority;
 261
 262                 if (priority > thread->max_priority)
 263                         priority = thread->max_priority;
 264                 else
 265                 if (priority < MINPRI)
 266                         priority = MINPRI;
 267
 268                 thread->importance = priority - thread->task_priority;
 269
 270                 set_priority(thread, priority);
 271         }
 272
 273         thread_unlock(thread);
 274         splx(s);
 275
 276         return (KERN_SUCCESS);
 277 }
 278
 279 /*
 280  *      thread_set_policy
 281  *
 282  *      Set scheduling policy and parameters, both base and limit, for
 283  *      the given thread. Policy can be any policy implemented by the
 284  *      processor set, whether enabled or not.
 285  */
 286 kern_return_t
 287 thread_set_policy(
 288         thread_act_t                    thr_act,
 289         processor_set_t                 pset,
 290         policy_t                                policy,
 291         policy_base_t                   base,
 292         mach_msg_type_number_t  base_count,
 293         policy_limit_t                  limit,
 294         mach_msg_type_number_t  limit_count)
 295 {
 296         thread_t                                thread;
 297         int                                     max, bas;
 298         kern_return_t                   result = KERN_SUCCESS;
 299
 300         if (    thr_act == THR_ACT_NULL                 ||
 301                         pset == PROCESSOR_SET_NULL              )
 302                 return (KERN_INVALID_ARGUMENT);
 303
 304         thread = act_lock_thread(thr_act);
 305         if (thread == THREAD_NULL) {
 306                 act_unlock_thread(thr_act);
 307
 308                 return(KERN_INVALID_ARGUMENT);
 309         }
 310
 311         if (pset != thread->processor_set) {
 312                 act_unlock_thread(thr_act);
 313
 314                 return(KERN_FAILURE);
 315         }
 316
 317         switch (policy) {
 318
 319         case POLICY_RR:
 320         {
 321                 policy_rr_base_t                rr_base = (policy_rr_base_t) base;
 322                 policy_rr_limit_t               rr_limit = (policy_rr_limit_t) limit;
 323
 324                 if (    base_count != POLICY_RR_BASE_COUNT              ||
 325                                 limit_count != POLICY_RR_LIMIT_COUNT            ) {
 326                         result = KERN_INVALID_ARGUMENT;
 327                         break;
 328                 }
 329
 330                 bas = rr_base->base_priority;
 331                 max = rr_limit->max_priority;
 332                 if (invalid_pri(bas) || invalid_pri(max)) {
 333                         result = KERN_INVALID_ARGUMENT;
 334                         break;
 335                 }
 336
 337                 break;
 338         }
 339
 340         case POLICY_FIFO:
 341         {
 342                 policy_fifo_base_t              fifo_base = (policy_fifo_base_t) base;
 343                 policy_fifo_limit_t             fifo_limit = (policy_fifo_limit_t) limit;
 344
 345                 if (    base_count != POLICY_FIFO_BASE_COUNT    ||
 346                                 limit_count != POLICY_FIFO_LIMIT_COUNT)         {
 347                         result = KERN_INVALID_ARGUMENT;
 348                         break;
 349                 }
 350
 351                 bas = fifo_base->base_priority;
 352                 max = fifo_limit->max_priority;
 353                 if (invalid_pri(bas) || invalid_pri(max)) {
 354                         result = KERN_INVALID_ARGUMENT;
 355                         break;
 356                 }
 357
 358                 break;
 359         }
 360
 361         case POLICY_TIMESHARE:
 362         {
 363                 policy_timeshare_base_t         ts_base = (policy_timeshare_base_t) base;
 364                 policy_timeshare_limit_t        ts_limit =
 365                                                 (policy_timeshare_limit_t) limit;
 366
 367                 if (    base_count != POLICY_TIMESHARE_BASE_COUNT               ||
 368                                 limit_count != POLICY_TIMESHARE_LIMIT_COUNT                     ) {
 369                         result = KERN_INVALID_ARGUMENT;
 370                         break;
 371                 }
 372
 373                 bas = ts_base->base_priority;
 374                 max = ts_limit->max_priority;
 375                 if (invalid_pri(bas) || invalid_pri(max)) {
 376                         result = KERN_INVALID_ARGUMENT;
 377                         break;
 378                 }
 379
 380                 break;
 381         }
 382
 383         default:
 384                 result = KERN_INVALID_POLICY;
 385         }
 386
 387         if (result != KERN_SUCCESS) {
 388                 act_unlock_thread(thr_act);
 389
 390                 return(result);
 391         }
 392
 393         result = thread_policy_common(thread, policy, bas);
 394         act_unlock_thread(thr_act);
 395
 396         return(result);
 397 }
 398
 399
 400 /*
 401  *      thread_policy
 402  *
 403  *      Set scheduling policy and parameters, both base and limit, for
 404  *      the given thread. Policy must be a policy which is enabled for the
 405  *      processor set. Change contained threads if requested.
 406  */
 407 kern_return_t
 408 thread_policy(
 409         thread_act_t                    thr_act,
 410         policy_t                                policy,
 411         policy_base_t                   base,
 412         mach_msg_type_number_t  count,
 413         boolean_t                               set_limit)
 414 {
 415         thread_t                                thread;
 416         processor_set_t                 pset;
 417         kern_return_t                   result = KERN_SUCCESS;
 418         policy_limit_t                  limit;
 419         int                                             limcount;
 420         policy_rr_limit_data_t                  rr_limit;
 421         policy_fifo_limit_data_t                fifo_limit;
 422         policy_timeshare_limit_data_t   ts_limit;
 423
 424         if (thr_act == THR_ACT_NULL)
 425                 return (KERN_INVALID_ARGUMENT);
 426
 427         thread = act_lock_thread(thr_act);
 428         pset = thread->processor_set;
 429         if (    thread == THREAD_NULL           ||
 430                         pset == PROCESSOR_SET_NULL              ){
 431                 act_unlock_thread(thr_act);
 432
 433                 return(KERN_INVALID_ARGUMENT);
 434         }
 435
 436         if (    invalid_policy(policy)                                                                                  ||
 437                         ((POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO) & policy) == 0    ) {
 438                 act_unlock_thread(thr_act);
 439
 440                 return(KERN_INVALID_POLICY);
 441         }
 442
 443         if (set_limit) {
 444                 /*
 445                  *      Set scheduling limits to base priority.
 446                  */
 447                 switch (policy) {
 448
 449                 case POLICY_RR:
 450                 {
 451                         policy_rr_base_t rr_base;
 452
 453                         if (count != POLICY_RR_BASE_COUNT) {
 454                                 result = KERN_INVALID_ARGUMENT;
 455                                 break;
 456                         }
 457
 458                         limcount = POLICY_RR_LIMIT_COUNT;
 459                         rr_base = (policy_rr_base_t) base;
 460                         rr_limit.max_priority = rr_base->base_priority;
 461                         limit = (policy_limit_t) &rr_limit;
 462
 463                         break;
 464                 }
 465
 466                 case POLICY_FIFO:
 467                 {
 468                         policy_fifo_base_t fifo_base;
 469
 470                         if (count != POLICY_FIFO_BASE_COUNT) {
 471                                 result = KERN_INVALID_ARGUMENT;
 472                                 break;
 473                         }
 474
 475                         limcount = POLICY_FIFO_LIMIT_COUNT;
 476                         fifo_base = (policy_fifo_base_t) base;
 477                         fifo_limit.max_priority = fifo_base->base_priority;
 478                         limit = (policy_limit_t) &fifo_limit;
 479
 480                         break;
 481                 }
 482
 483                 case POLICY_TIMESHARE:
 484                 {
 485                         policy_timeshare_base_t ts_base;
 486
 487                         if (count != POLICY_TIMESHARE_BASE_COUNT) {
 488                                 result = KERN_INVALID_ARGUMENT;
 489                                 break;
 490                         }
 491
 492                         limcount = POLICY_TIMESHARE_LIMIT_COUNT;
 493                         ts_base = (policy_timeshare_base_t) base;
 494                         ts_limit.max_priority = ts_base->base_priority;
 495                         limit = (policy_limit_t) &ts_limit;
 496
 497                         break;
 498                 }
 499
 500                 default:
 501                         result = KERN_INVALID_POLICY;
 502                         break;
 503                 }
 504
 505         }
 506         else {
 507                 /*
 508                  *      Use current scheduling limits. Ensure that the
 509                  *      new base priority will not exceed current limits.
 510                  */
 511                 switch (policy) {
 512
 513                 case POLICY_RR:
 514                 {
 515                         policy_rr_base_t rr_base;
 516
 517                         if (count != POLICY_RR_BASE_COUNT) {
 518                                 result = KERN_INVALID_ARGUMENT;
 519                                 break;
 520                         }
 521
 522                         limcount = POLICY_RR_LIMIT_COUNT;
 523                         rr_base = (policy_rr_base_t) base;
 524                         if (rr_base->base_priority > thread->max_priority) {
 525                                 result = KERN_POLICY_LIMIT;
 526                                 break;
 527                         }
 528
 529                         rr_limit.max_priority = thread->max_priority;
 530                         limit = (policy_limit_t) &rr_limit;
 531
 532                         break;
 533                 }
 534
 535                 case POLICY_FIFO:
 536                 {
 537                         policy_fifo_base_t fifo_base;
 538
 539                         if (count != POLICY_FIFO_BASE_COUNT) {
 540                                 result = KERN_INVALID_ARGUMENT;
 541                                 break;
 542                         }
 543
 544                         limcount = POLICY_FIFO_LIMIT_COUNT;
 545                         fifo_base = (policy_fifo_base_t) base;
 546                         if (fifo_base->base_priority > thread->max_priority) {
 547                                 result = KERN_POLICY_LIMIT;
 548                                 break;
 549                         }
 550
 551                         fifo_limit.max_priority = thread->max_priority;
 552                         limit = (policy_limit_t) &fifo_limit;
 553
 554                         break;
 555                 }
 556
 557                 case POLICY_TIMESHARE:
 558                 {
 559                         policy_timeshare_base_t ts_base;
 560
 561                         if (count != POLICY_TIMESHARE_BASE_COUNT) {
 562                                 result = KERN_INVALID_ARGUMENT;
 563                                 break;
 564                         }
 565
 566                         limcount = POLICY_TIMESHARE_LIMIT_COUNT;
 567                         ts_base = (policy_timeshare_base_t) base;
 568                         if (ts_base->base_priority > thread->max_priority) {
 569                                 result = KERN_POLICY_LIMIT;
 570                                 break;
 571                         }
 572
 573                         ts_limit.max_priority = thread->max_priority;
 574                         limit = (policy_limit_t) &ts_limit;
 575
 576                         break;
 577                 }
 578
 579                 default:
 580                         result = KERN_INVALID_POLICY;
 581                         break;
 582                 }
 583
 584         }
 585
 586         act_unlock_thread(thr_act);
 587
 588         if (result == KERN_SUCCESS)
 589             result = thread_set_policy(thr_act, pset,
 590                                          policy, base, count, limit, limcount);
 591
 592         return(result);
 593 }
 594
 595 /*
 596  *      Define shifts for simulating (5/8)**n
 597  */
 598
 599 shift_data_t    wait_shift[32] = {
 600         {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7},
 601         {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13},
 602         {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18},
 603         {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}};
 604
 605 /*
 606  *      do_priority_computation:
 607  *
 608  *      Calculate new priority for thread based on its base priority plus
 609  *      accumulated usage.  PRI_SHIFT and PRI_SHIFT_2 convert from
 610  *      usage to priorities.  SCHED_SHIFT converts for the scaling
 611  *      of the sched_usage field by SCHED_SCALE.  This scaling comes
 612  *      from the multiplication by sched_load (thread_timer_delta)
 613  *      in sched.h.  sched_load is calculated as a scaled overload
 614  *      factor in compute_mach_factor (mach_factor.c).
 615  */
 616 #ifdef  PRI_SHIFT_2
 617 #if     PRI_SHIFT_2 > 0
 618 #define do_priority_computation(thread, pri)                                            \
 619         MACRO_BEGIN                                                                                                             \
 620         (pri) = (thread)->priority              /* start with base priority */  \
 621             - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT))              \
 622             - ((thread)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT));   \
 623         if ((pri) < MINPRI_STANDARD)                                                                    \
 624                 (pri) = MINPRI_STANDARD;                                                                        \
 625         else                                                                                                                    \
 626         if ((pri) > MAXPRI_STANDARD)                                                                    \
 627                 (pri) = MAXPRI_STANDARD;                                                                        \
 628         MACRO_END
 629 #else   /* PRI_SHIFT_2 */
 630 #define do_priority_computation(thread, pri)                                            \
 631         MACRO_BEGIN                                                                                                             \
 632         (pri) = (thread)->priority              /* start with base priority */  \
 633             - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT))              \
 634             + ((thread)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2));   \
 635         if ((pri) < MINPRI_STANDARD)                                                                    \
 636                 (pri) = MINPRI_STANDARD;                                                                        \
 637         else                                                                                                                    \
 638         if ((pri) > MAXPRI_STANDARD)                                                                    \
 639                 (pri) = MAXPRI_STANDARD;                                                                        \
 640         MACRO_END
 641 #endif  /* PRI_SHIFT_2 */
 642 #else   /* defined(PRI_SHIFT_2) */
 643 #define do_priority_computation(thread, pri)                                            \
 644         MACRO_BEGIN                                                                                                             \
 645         (pri) = (thread)->priority              /* start with base priority */  \
 646             - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT));             \
 647         if ((pri) < MINPRI_STANDARD)                                                                    \
 648                 (pri) = MINPRI_STANDARD;                                                                        \
 649         else                                                                                                                    \
 650         if ((pri) > MAXPRI_STANDARD)                                                                    \
 651                 (pri) = MAXPRI_STANDARD;                                                                        \
 652         MACRO_END
 653 #endif  /* defined(PRI_SHIFT_2) */
 654
 655 void
 656 set_priority(
 657         register thread_t       thread,
 658         register int            priority)
 659 {
 660         thread->priority = priority;
 661         compute_priority(thread, FALSE);
 662 }
 663
 664 /*
 665  *      compute_priority:
 666  *
 667  *      Reset the current scheduled priority of the
 668  *      thread according to its base priority if the
 669  *      thread has not been promoted or depressed.
 670  *
 671  *      If the thread is timesharing, adjust according
 672  *      to recent cpu usage.
 673  *
 674  *      The thread *must* be locked by the caller.
 675  */
 676 void
 677 compute_priority(
 678         register thread_t       thread,
 679         boolean_t                       override_depress)
 680 {
 681         register int            priority;
 682
 683         if (    !(thread->sched_mode & TH_MODE_PROMOTED)                        &&
 684                         (!(thread->sched_mode & TH_MODE_ISDEPRESSED)    ||
 685                                  override_depress                                                       )               ) {
 686                 if (thread->sched_mode & TH_MODE_TIMESHARE)
 687                         do_priority_computation(thread, priority);
 688                 else
 689                         priority = thread->priority;
 690
 691                 set_sched_pri(thread, priority);
 692         }
 693 }
 694
 695 /*
 696  *      compute_my_priority:
 697  *
 698  *      Version of compute priority for current thread.
 699  *      Caller must     have thread     locked and thread must
 700  *      be timesharing and not depressed.
 701  *
 702  *      Only used for priority updates.
 703  */
 704 void
 705 compute_my_priority(
 706         register thread_t       thread)
 707 {
 708         register int            priority;
 709
 710         do_priority_computation(thread, priority);
 711         assert(thread->runq == RUN_QUEUE_NULL);
 712         thread->sched_pri = priority;
 713 }
 714
 715 /*
 716  *      update_priority
 717  *
 718  *      Cause the priority computation of a thread that has been
 719  *      sleeping or suspended to "catch up" with the system.  Thread
 720  *      *MUST* be locked by caller.  If thread is running, then this
 721  *      can only be called by the thread on itself.
 722  */
 723 void
 724 update_priority(
 725         register thread_t               thread)
 726 {
 727         register unsigned int   ticks;
 728         register shift_t                shiftp;
 729
 730         ticks = sched_tick - thread->sched_stamp;
 731         assert(ticks != 0);
 732
 733         /*
 734          *      If asleep for more than 30 seconds forget all
 735          *      cpu_usage, else catch up on missed aging.
 736          *      5/8 ** n is approximated by the two shifts
 737          *      in the wait_shift array.
 738          */
 739         thread->sched_stamp += ticks;
 740         thread_timer_delta(thread);
 741         if (ticks >  30) {
 742                 thread->cpu_usage = 0;
 743                 thread->sched_usage = 0;
 744         }
 745         else {
 746                 thread->cpu_usage += thread->cpu_delta;
 747                 thread->sched_usage += thread->sched_delta;
 748
 749                 shiftp = &wait_shift[ticks];
 750                 if (shiftp->shift2 > 0) {
 751                     thread->cpu_usage =
 752                                                 (thread->cpu_usage >> shiftp->shift1) +
 753                                                 (thread->cpu_usage >> shiftp->shift2);
 754                     thread->sched_usage =
 755                                                 (thread->sched_usage >> shiftp->shift1) +
 756                                                 (thread->sched_usage >> shiftp->shift2);
 757                 }
 758                 else {
 759                     thread->cpu_usage =
 760                                                 (thread->cpu_usage >> shiftp->shift1) -
 761                                                 (thread->cpu_usage >> -(shiftp->shift2));
 762                     thread->sched_usage =
 763                                                 (thread->sched_usage >> shiftp->shift1) -
 764                                                 (thread->sched_usage >> -(shiftp->shift2));
 765                 }
 766         }
 767
 768         thread->cpu_delta = 0;
 769         thread->sched_delta = 0;
 770
 771         /*
 772          *      Check for fail-safe release.
 773          */
 774         if (    (thread->sched_mode & TH_MODE_FAILSAFE)         &&
 775                         thread->sched_stamp >= thread->safe_release             ) {
 776                 if (!(thread->safe_mode & TH_MODE_TIMESHARE)) {
 777                         if (thread->safe_mode & TH_MODE_REALTIME) {
 778                                 thread->priority = BASEPRI_RTQUEUES;
 779
 780                                 thread->sched_mode |= TH_MODE_REALTIME;
 781                         }
 782
 783                         thread->sched_mode &= ~TH_MODE_TIMESHARE;
 784
 785                         if (thread->state & TH_RUN)
 786                                 pset_share_decr(thread->processor_set);
 787
 788                         if (!(thread->sched_mode & TH_MODE_ISDEPRESSED))
 789                                 set_sched_pri(thread, thread->priority);
 790                 }
 791
 792                 thread->safe_mode = 0;
 793                 thread->sched_mode &= ~TH_MODE_FAILSAFE;
 794         }
 795
 796         /*
 797          *      Recompute scheduled priority if appropriate.
 798          */
 799         if (    (thread->sched_mode & TH_MODE_TIMESHARE)        &&
 800                         !(thread->sched_mode & TH_MODE_PROMOTED)        &&
 801                         !(thread->sched_mode & TH_MODE_ISDEPRESSED)             ) {
 802                 register int            new_pri;
 803
 804                 do_priority_computation(thread, new_pri);
 805                 if (new_pri != thread->sched_pri) {
 806                         run_queue_t             runq;
 807
 808                         runq = run_queue_remove(thread);
 809                         thread->sched_pri = new_pri;
 810                         if (runq != RUN_QUEUE_NULL)
 811                                 thread_setrun(thread, SCHED_TAILQ);
 812                 }
 813         }
 814 }
 815
 816 /*
 817  *      thread_switch_continue:
 818  *
 819  *      Continuation routine for a thread switch.
 820  *
 821  *      Just need to arrange the return value gets sent out correctly and that
 822  *  we cancel the timer or the depression called for by the options to the
 823  *  thread_switch call.
 824  */
 825 void
 826 _mk_sp_thread_switch_continue(void)
 827 {
 828         register thread_t       self = current_thread();
 829         int                                     wait_result = self->wait_result;
 830         int                                     option = self->saved.swtch.option;
 831
 832         if (option == SWITCH_OPTION_WAIT && wait_result != THREAD_TIMED_OUT)
 833                 thread_cancel_timer();
 834         else
 835         if (option == SWITCH_OPTION_DEPRESS)
 836                 _mk_sp_thread_depress_abort(self, FALSE);
 837
 838         thread_syscall_return(KERN_SUCCESS);
 839         /*NOTREACHED*/
 840 }
 841
 842 /*
 843  *      thread_switch:
 844  *
 845  *      Context switch.  User may supply thread hint.
 846  *
 847  *      Fixed priority threads that call this get what they asked for
 848  *      even if that violates priority order.
 849  */
 850 kern_return_t
 851 _mk_sp_thread_switch(
 852         thread_act_t                    hint_act,
 853         int                                             option,
 854         mach_msg_timeout_t              option_time)
 855 {
 856     register thread_t           self = current_thread();
 857         int                                             s;
 858
 859     /*
 860      *  Check and use thr_act hint if appropriate.  It is not
 861      *  appropriate to give a hint that shares the current shuttle.
 862      */
 863         if (hint_act != THR_ACT_NULL) {
 864                 register thread_t               thread = act_lock_thread(hint_act);
 865
 866                 if (            thread != THREAD_NULL                   &&
 867                                         thread != self                                  &&
 868                                         thread->top_act == hint_act                             ) {
 869                         processor_t             processor;
 870
 871                         s = splsched();
 872                         thread_lock(thread);
 873
 874                         /*
 875                          *      Check if the thread is in the right pset,
 876                          *      is not bound to a different processor,
 877                          *      and that realtime is not involved.
 878                          *
 879                          *      Next, pull it off its run queue.  If it
 880                          *      doesn't come, it's not eligible.
 881                          */
 882                         processor = current_processor();
 883                         if (processor->current_pri < BASEPRI_RTQUEUES                   &&
 884                                 thread->sched_pri < BASEPRI_RTQUEUES                            &&
 885                                 thread->processor_set == processor->processor_set       &&
 886                                 (thread->bound_processor == PROCESSOR_NULL      ||
 887                                  thread->bound_processor == processor)                          &&
 888                                         run_queue_remove(thread) != RUN_QUEUE_NULL                      ) {
 889                                 /*
 890                                  *      Hah, got it!!
 891                                  */
 892                                 thread_unlock(thread);
 893
 894                                 act_unlock_thread(hint_act);
 895                                 act_deallocate(hint_act);
 896
 897                                 if (option == SWITCH_OPTION_WAIT)
 898                                         assert_wait_timeout(option_time, THREAD_ABORTSAFE);
 899                                 else
 900                                 if (option == SWITCH_OPTION_DEPRESS)
 901                                         _mk_sp_thread_depress_ms(option_time);
 902
 903                                 self->saved.swtch.option = option;
 904
 905                                 thread_run(self, _mk_sp_thread_switch_continue, thread);
 906                                 /* NOTREACHED */
 907                         }
 908
 909                         thread_unlock(thread);
 910                         splx(s);
 911                 }
 912
 913                 act_unlock_thread(hint_act);
 914                 act_deallocate(hint_act);
 915     }
 916
 917     /*
 918      *  No handoff hint supplied, or hint was wrong.  Call thread_block() in
 919      *  hopes of running something else.  If nothing else is runnable,
 920      *  thread_block will detect this.  WARNING: thread_switch with no
 921      *  option will not do anything useful if the thread calling it is the
 922      *  highest priority thread (can easily happen with a collection
 923      *  of timesharing threads).
 924      */
 925         if (option == SWITCH_OPTION_WAIT)
 926                 assert_wait_timeout(option_time, THREAD_ABORTSAFE);
 927         else
 928         if (option == SWITCH_OPTION_DEPRESS)
 929                 _mk_sp_thread_depress_ms(option_time);
 930
 931         self->saved.swtch.option = option;
 932
 933         thread_block_reason(_mk_sp_thread_switch_continue, AST_YIELD);
 934
 935         if (option == SWITCH_OPTION_WAIT)
 936                 thread_cancel_timer();
 937         else
 938         if (option == SWITCH_OPTION_DEPRESS)
 939                 _mk_sp_thread_depress_abort(self, FALSE);
 940
 941     return (KERN_SUCCESS);
 942 }
 943
 944 /*
 945  * Depress thread's priority to lowest possible for the specified interval,
 946  * with a value of zero resulting in no timeout being scheduled.
 947  */
 948 void
 949 _mk_sp_thread_depress_abstime(
 950         uint64_t                                interval)
 951 {
 952         register thread_t               self = current_thread();
 953         uint64_t                                deadline;
 954     spl_t                                       s;
 955
 956     s = splsched();
 957     thread_lock(self);
 958         if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
 959                 processor_t             myprocessor = self->last_processor;
 960
 961                 self->sched_pri = DEPRESSPRI;
 962                 myprocessor->current_pri = self->sched_pri;
 963                 self->sched_mode &= ~TH_MODE_PREEMPT;
 964                 self->sched_mode |= TH_MODE_DEPRESS;
 965
 966                 if (interval != 0) {
 967                         clock_absolutetime_interval_to_deadline(interval, &deadline);
 968                         if (!timer_call_enter(&self->depress_timer, deadline))
 969                                 self->depress_timer_active++;
 970                 }
 971         }
 972         thread_unlock(self);
 973     splx(s);
 974 }
 975
 976 void
 977 _mk_sp_thread_depress_ms(
 978         mach_msg_timeout_t              interval)
 979 {
 980         uint64_t                abstime;
 981
 982         clock_interval_to_absolutetime_interval(
 983                                                         interval, 1000*NSEC_PER_USEC, &abstime);
 984         _mk_sp_thread_depress_abstime(abstime);
 985 }
 986
 987 /*
 988  *      Priority depression expiration.
 989  */
 990 void
 991 thread_depress_expire(
 992         timer_call_param_t              p0,
 993         timer_call_param_t              p1)
 994 {
 995         thread_t                thread = p0;
 996     spl_t                       s;
 997
 998     s = splsched();
 999     thread_lock(thread);
1000         if (--thread->depress_timer_active == 1) {
1001                 thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
1002                 compute_priority(thread, FALSE);
1003         }
1004     thread_unlock(thread);
1005     splx(s);
1006 }
1007
1008 /*
1009  *      Prematurely abort priority depression if there is one.
1010  */
1011 kern_return_t
1012 _mk_sp_thread_depress_abort(
1013         register thread_t               thread,
1014         boolean_t                               abortall)
1015 {
1016     kern_return_t                       result = KERN_NOT_DEPRESSED;
1017     spl_t                                       s;
1018
1019     s = splsched();
1020     thread_lock(thread);
1021         if (abortall || !(thread->sched_mode & TH_MODE_POLLDEPRESS)) {
1022                 if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
1023                         thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
1024                         compute_priority(thread, FALSE);
1025                         result = KERN_SUCCESS;
1026                 }
1027
1028                 if (timer_call_cancel(&thread->depress_timer))
1029                         thread->depress_timer_active--;
1030         }
1031         thread_unlock(thread);
1032     splx(s);
1033
1034     return (result);
1035 }
1036
1037 void
1038 _mk_sp_thread_perhaps_yield(
1039         thread_t                        self)
1040 {
1041         spl_t                   s;
1042
1043         assert(self == current_thread());
1044
1045         s = splsched();
1046         if (!(self->sched_mode & (TH_MODE_REALTIME|TH_MODE_TIMESHARE))) {
1047                 extern uint64_t         max_poll_computation;
1048                 extern int                      sched_poll_yield_shift;
1049                 uint64_t                        total_computation, abstime;
1050
1051                 abstime = mach_absolute_time();
1052                 total_computation = abstime - self->computation_epoch;
1053                 total_computation += self->computation_metered;
1054                 if (total_computation >= max_poll_computation) {
1055                         processor_t             myprocessor = current_processor();
1056                         ast_t                   preempt;
1057
1058                         thread_lock(self);
1059                         if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
1060                                 self->sched_pri = DEPRESSPRI;
1061                                 myprocessor->current_pri = self->sched_pri;
1062                                 self->sched_mode &= ~TH_MODE_PREEMPT;
1063                         }
1064                         self->computation_epoch = abstime;
1065                         self->computation_metered = 0;
1066                         self->sched_mode |= TH_MODE_POLLDEPRESS;
1067
1068                         abstime += (total_computation >> sched_poll_yield_shift);
1069                         if (!timer_call_enter(&self->depress_timer, abstime))
1070                                 self->depress_timer_active++;
1071                         thread_unlock(self);
1072
1073                         if ((preempt = csw_check(self, myprocessor)) != AST_NONE)
1074                                 ast_on(preempt);
1075                 }
1076         }
1077         splx(s);
1078 }