osfmk/kern/mk_sp.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*
  26  * @OSF_COPYRIGHT@
  27  *
  28  */
  29
  30 /***
  31  *** ??? The following lines were picked up when code was incorporated
  32  *** into this file from `kern/syscall_subr.c.'  These should be moved
  33  *** with the code if it moves again.  Otherwise, they should be trimmed,
  34  *** based on the files included above.
  35  ***/
  36
  37 #include <mach/boolean.h>
  38 #include <mach/thread_switch.h>
  39 #include <ipc/ipc_port.h>
  40 #include <ipc/ipc_space.h>
  41 #include <kern/ipc_kobject.h>
  42 #include <kern/processor.h>
  43 #include <kern/sched.h>
  44 #include <kern/sched_prim.h>
  45 #include <kern/spl.h>
  46 #include <kern/task.h>
  47 #include <kern/thread.h>
  48 #include <kern/ast.h>
  49 #include <mach/policy.h>
  50
  51 #include <kern/syscall_subr.h>
  52 #include <mach/mach_host_server.h>
  53 #include <mach/mach_syscalls.h>
  54
  55 /***
  56  *** ??? End of lines picked up when code was incorporated
  57  *** into this file from `kern/syscall_subr.c.'
  58  ***/
  59
  60 #include <kern/mk_sp.h>
  61 #include <kern/misc_protos.h>
  62 #include <kern/spl.h>
  63 #include <kern/sched.h>
  64 #include <kern/sched_prim.h>
  65 #include <kern/assert.h>
  66 #include <kern/thread.h>
  67 #include <mach/mach_host_server.h>
  68
  69 /***
  70  *** ??? The next two files supply the prototypes for `thread_set_policy()'
  71  *** and `thread_policy.'  These routines cannot stay here if they are
  72  *** exported Mach system calls.
  73  ***/
  74 #include <mach/thread_act_server.h>
  75 #include <mach/host_priv_server.h>
  76 #include <sys/kdebug.h>
  77
  78 void
  79 _mk_sp_thread_unblock(
  80         thread_t                        thread)
  81 {
  82         thread_setrun(thread, TAIL_Q);
  83
  84         thread->current_quantum = 0;
  85         thread->computation_metered = 0;
  86         thread->reason = AST_NONE;
  87
  88         KERNEL_DEBUG_CONSTANT(
  89                         MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE,
  90                                         (int)thread, (int)thread->sched_pri, 0, 0, 0);
  91 }
  92
  93 void
  94 _mk_sp_thread_done(
  95         thread_t                        old_thread,
  96         thread_t                        new_thread,
  97         processor_t                     processor)
  98 {
  99         /*
 100          * A running thread is being taken off a processor:
 101          */
 102         clock_get_uptime(&processor->last_dispatch);
 103         if (!(old_thread->state & TH_IDLE)) {
 104                 /*
 105                  * Compute remainder of current quantum.
 106                  */
 107                 if (            first_quantum(processor)                                                        &&
 108                                 processor->quantum_end > processor->last_dispatch               )
 109                         old_thread->current_quantum =
 110                                         (processor->quantum_end - processor->last_dispatch);
 111                 else
 112                         old_thread->current_quantum = 0;
 113
 114                 /*
 115                  * For non-realtime threads treat a tiny
 116                  * remaining quantum as an expired quantum
 117                  * but include what's left next time.
 118                  */
 119                 if (!(old_thread->sched_mode & TH_MODE_REALTIME)) {
 120                         if (old_thread->current_quantum < min_std_quantum) {
 121                                 old_thread->reason |= AST_QUANTUM;
 122                                 old_thread->current_quantum += std_quantum;
 123                         }
 124                 }
 125                 else
 126                 if (old_thread->current_quantum == 0)
 127                         old_thread->reason |= AST_QUANTUM;
 128
 129                 /*
 130                  * If we are doing a direct handoff then
 131                  * give the remainder of our quantum to
 132                  * the next guy.
 133                  */
 134                 if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) {
 135                         new_thread->current_quantum = old_thread->current_quantum;
 136                         old_thread->reason |= AST_QUANTUM;
 137                         old_thread->current_quantum = 0;
 138                 }
 139
 140                 old_thread->last_switch = processor->last_dispatch;
 141
 142                 old_thread->computation_metered +=
 143                                         (old_thread->last_switch - old_thread->computation_epoch);
 144         }
 145 }
 146
 147 void
 148 _mk_sp_thread_begin(
 149         thread_t                        thread,
 150         processor_t                     processor)
 151 {
 152
 153         /*
 154          * The designated thread is beginning execution:
 155          */
 156         if (!(thread->state & TH_IDLE)) {
 157                 if (thread->current_quantum == 0)
 158                         thread->current_quantum =
 159                                                 (thread->sched_mode & TH_MODE_REALTIME)?
 160                                                                         thread->realtime.computation: std_quantum;
 161
 162                 processor->quantum_end =
 163                                                 (processor->last_dispatch + thread->current_quantum);
 164                 timer_call_enter1(&processor->quantum_timer,
 165                                                                 thread, processor->quantum_end);
 166
 167                 processor->slice_quanta =
 168                                                 (thread->sched_mode & TH_MODE_TIMESHARE)?
 169                                                                         processor->processor_set->set_quanta: 1;
 170
 171                 thread->last_switch = processor->last_dispatch;
 172
 173                 thread->computation_epoch = thread->last_switch;
 174         }
 175         else {
 176                 timer_call_cancel(&processor->quantum_timer);
 177
 178                 processor->slice_quanta = 1;
 179         }
 180 }
 181
 182 void
 183 _mk_sp_thread_dispatch(
 184         thread_t                thread)
 185 {
 186         if (thread->reason & AST_QUANTUM)
 187                 thread_setrun(thread, TAIL_Q);
 188         else
 189                 thread_setrun(thread, HEAD_Q);
 190
 191         thread->reason = AST_NONE;
 192 }
 193
 194 /*
 195  *      thread_policy_common:
 196  *
 197  *      Set scheduling policy & priority for thread.
 198  */
 199 static kern_return_t
 200 thread_policy_common(
 201         thread_t                thread,
 202         integer_t               policy,
 203         integer_t               priority)
 204 {
 205         spl_t                   s;
 206
 207         if (    thread == THREAD_NULL           ||
 208                         invalid_policy(policy)          )
 209                 return(KERN_INVALID_ARGUMENT);
 210
 211         s = splsched();
 212         thread_lock(thread);
 213
 214         if (    !(thread->sched_mode & TH_MODE_REALTIME)        &&
 215                         !(thread->safe_mode & TH_MODE_REALTIME)                 ) {
 216                 if (!(thread->sched_mode & TH_MODE_FAILSAFE)) {
 217                         if (policy == POLICY_TIMESHARE)
 218                                 thread->sched_mode |= TH_MODE_TIMESHARE;
 219                         else
 220                                 thread->sched_mode &= ~TH_MODE_TIMESHARE;
 221                 }
 222                 else {
 223                         if (policy == POLICY_TIMESHARE)
 224                                 thread->safe_mode |= TH_MODE_TIMESHARE;
 225                         else
 226                                 thread->safe_mode &= ~TH_MODE_TIMESHARE;
 227                 }
 228
 229                 if (priority >= thread->max_priority)
 230                         priority = thread->max_priority - thread->task_priority;
 231                 else
 232                 if (priority >= MINPRI_KERNEL)
 233                         priority -= MINPRI_KERNEL;
 234                 else
 235                 if (priority >= MINPRI_SYSTEM)
 236                         priority -= MINPRI_SYSTEM;
 237                 else
 238                         priority -= BASEPRI_DEFAULT;
 239
 240                 priority += thread->task_priority;
 241
 242                 if (priority > thread->max_priority)
 243                         priority = thread->max_priority;
 244                 else
 245                 if (priority < MINPRI)
 246                         priority = MINPRI;
 247
 248                 thread->importance = priority - thread->task_priority;
 249
 250                 set_priority(thread, priority);
 251         }
 252
 253         thread_unlock(thread);
 254         splx(s);
 255
 256         return (KERN_SUCCESS);
 257 }
 258
 259 /*
 260  *      thread_set_policy
 261  *
 262  *      Set scheduling policy and parameters, both base and limit, for
 263  *      the given thread. Policy can be any policy implemented by the
 264  *      processor set, whether enabled or not.
 265  */
 266 kern_return_t
 267 thread_set_policy(
 268         thread_act_t                    thr_act,
 269         processor_set_t                 pset,
 270         policy_t                                policy,
 271         policy_base_t                   base,
 272         mach_msg_type_number_t  base_count,
 273         policy_limit_t                  limit,
 274         mach_msg_type_number_t  limit_count)
 275 {
 276         thread_t                                thread;
 277         int                                     max, bas;
 278         kern_return_t                   result = KERN_SUCCESS;
 279
 280         if (    thr_act == THR_ACT_NULL                 ||
 281                         pset == PROCESSOR_SET_NULL              )
 282                 return (KERN_INVALID_ARGUMENT);
 283
 284         thread = act_lock_thread(thr_act);
 285         if (thread == THREAD_NULL) {
 286                 act_unlock_thread(thr_act);
 287
 288                 return(KERN_INVALID_ARGUMENT);
 289         }
 290
 291         if (pset != thread->processor_set) {
 292                 act_unlock_thread(thr_act);
 293
 294                 return(KERN_FAILURE);
 295         }
 296
 297         switch (policy) {
 298
 299         case POLICY_RR:
 300         {
 301                 policy_rr_base_t                rr_base = (policy_rr_base_t) base;
 302                 policy_rr_limit_t               rr_limit = (policy_rr_limit_t) limit;
 303
 304                 if (    base_count != POLICY_RR_BASE_COUNT              ||
 305                                 limit_count != POLICY_RR_LIMIT_COUNT            ) {
 306                         result = KERN_INVALID_ARGUMENT;
 307                         break;
 308                 }
 309
 310                 bas = rr_base->base_priority;
 311                 max = rr_limit->max_priority;
 312                 if (invalid_pri(bas) || invalid_pri(max)) {
 313                         result = KERN_INVALID_ARGUMENT;
 314                         break;
 315                 }
 316
 317                 break;
 318         }
 319
 320         case POLICY_FIFO:
 321         {
 322                 policy_fifo_base_t              fifo_base = (policy_fifo_base_t) base;
 323                 policy_fifo_limit_t             fifo_limit = (policy_fifo_limit_t) limit;
 324
 325                 if (    base_count != POLICY_FIFO_BASE_COUNT    ||
 326                                 limit_count != POLICY_FIFO_LIMIT_COUNT)         {
 327                         result = KERN_INVALID_ARGUMENT;
 328                         break;
 329                 }
 330
 331                 bas = fifo_base->base_priority;
 332                 max = fifo_limit->max_priority;
 333                 if (invalid_pri(bas) || invalid_pri(max)) {
 334                         result = KERN_INVALID_ARGUMENT;
 335                         break;
 336                 }
 337
 338                 break;
 339         }
 340
 341         case POLICY_TIMESHARE:
 342         {
 343                 policy_timeshare_base_t         ts_base = (policy_timeshare_base_t) base;
 344                 policy_timeshare_limit_t        ts_limit =
 345                                                 (policy_timeshare_limit_t) limit;
 346
 347                 if (    base_count != POLICY_TIMESHARE_BASE_COUNT               ||
 348                                 limit_count != POLICY_TIMESHARE_LIMIT_COUNT                     ) {
 349                         result = KERN_INVALID_ARGUMENT;
 350                         break;
 351                 }
 352
 353                 bas = ts_base->base_priority;
 354                 max = ts_limit->max_priority;
 355                 if (invalid_pri(bas) || invalid_pri(max)) {
 356                         result = KERN_INVALID_ARGUMENT;
 357                         break;
 358                 }
 359
 360                 break;
 361         }
 362
 363         default:
 364                 result = KERN_INVALID_POLICY;
 365         }
 366
 367         if (result != KERN_SUCCESS) {
 368                 act_unlock_thread(thr_act);
 369
 370                 return(result);
 371         }
 372
 373         result = thread_policy_common(thread, policy, bas);
 374         act_unlock_thread(thr_act);
 375
 376         return(result);
 377 }
 378
 379
 380 /*
 381  *      thread_policy
 382  *
 383  *      Set scheduling policy and parameters, both base and limit, for
 384  *      the given thread. Policy must be a policy which is enabled for the
 385  *      processor set. Change contained threads if requested.
 386  */
 387 kern_return_t
 388 thread_policy(
 389         thread_act_t                    thr_act,
 390         policy_t                                policy,
 391         policy_base_t                   base,
 392         mach_msg_type_number_t  count,
 393         boolean_t                               set_limit)
 394 {
 395         thread_t                                thread;
 396         processor_set_t                 pset;
 397         kern_return_t                   result = KERN_SUCCESS;
 398         policy_limit_t                  limit;
 399         int                                             limcount;
 400         policy_rr_limit_data_t                  rr_limit;
 401         policy_fifo_limit_data_t                fifo_limit;
 402         policy_timeshare_limit_data_t   ts_limit;
 403
 404         if (thr_act == THR_ACT_NULL)
 405                 return (KERN_INVALID_ARGUMENT);
 406
 407         thread = act_lock_thread(thr_act);
 408         pset = thread->processor_set;
 409         if (    thread == THREAD_NULL           ||
 410                         pset == PROCESSOR_SET_NULL              ){
 411                 act_unlock_thread(thr_act);
 412
 413                 return(KERN_INVALID_ARGUMENT);
 414         }
 415
 416         if (    invalid_policy(policy)                                                                                  ||
 417                         ((POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO) & policy) == 0    ) {
 418                 act_unlock_thread(thr_act);
 419
 420                 return(KERN_INVALID_POLICY);
 421         }
 422
 423         if (set_limit) {
 424                 /*
 425                  *      Set scheduling limits to base priority.
 426                  */
 427                 switch (policy) {
 428
 429                 case POLICY_RR:
 430                 {
 431                         policy_rr_base_t rr_base;
 432
 433                         if (count != POLICY_RR_BASE_COUNT) {
 434                                 result = KERN_INVALID_ARGUMENT;
 435                                 break;
 436                         }
 437
 438                         limcount = POLICY_RR_LIMIT_COUNT;
 439                         rr_base = (policy_rr_base_t) base;
 440                         rr_limit.max_priority = rr_base->base_priority;
 441                         limit = (policy_limit_t) &rr_limit;
 442
 443                         break;
 444                 }
 445
 446                 case POLICY_FIFO:
 447                 {
 448                         policy_fifo_base_t fifo_base;
 449
 450                         if (count != POLICY_FIFO_BASE_COUNT) {
 451                                 result = KERN_INVALID_ARGUMENT;
 452                                 break;
 453                         }
 454
 455                         limcount = POLICY_FIFO_LIMIT_COUNT;
 456                         fifo_base = (policy_fifo_base_t) base;
 457                         fifo_limit.max_priority = fifo_base->base_priority;
 458                         limit = (policy_limit_t) &fifo_limit;
 459
 460                         break;
 461                 }
 462
 463                 case POLICY_TIMESHARE:
 464                 {
 465                         policy_timeshare_base_t ts_base;
 466
 467                         if (count != POLICY_TIMESHARE_BASE_COUNT) {
 468                                 result = KERN_INVALID_ARGUMENT;
 469                                 break;
 470                         }
 471
 472                         limcount = POLICY_TIMESHARE_LIMIT_COUNT;
 473                         ts_base = (policy_timeshare_base_t) base;
 474                         ts_limit.max_priority = ts_base->base_priority;
 475                         limit = (policy_limit_t) &ts_limit;
 476
 477                         break;
 478                 }
 479
 480                 default:
 481                         result = KERN_INVALID_POLICY;
 482                         break;
 483                 }
 484
 485         }
 486         else {
 487                 /*
 488                  *      Use current scheduling limits. Ensure that the
 489                  *      new base priority will not exceed current limits.
 490                  */
 491                 switch (policy) {
 492
 493                 case POLICY_RR:
 494                 {
 495                         policy_rr_base_t rr_base;
 496
 497                         if (count != POLICY_RR_BASE_COUNT) {
 498                                 result = KERN_INVALID_ARGUMENT;
 499                                 break;
 500                         }
 501
 502                         limcount = POLICY_RR_LIMIT_COUNT;
 503                         rr_base = (policy_rr_base_t) base;
 504                         if (rr_base->base_priority > thread->max_priority) {
 505                                 result = KERN_POLICY_LIMIT;
 506                                 break;
 507                         }
 508
 509                         rr_limit.max_priority = thread->max_priority;
 510                         limit = (policy_limit_t) &rr_limit;
 511
 512                         break;
 513                 }
 514
 515                 case POLICY_FIFO:
 516                 {
 517                         policy_fifo_base_t fifo_base;
 518
 519                         if (count != POLICY_FIFO_BASE_COUNT) {
 520                                 result = KERN_INVALID_ARGUMENT;
 521                                 break;
 522                         }
 523
 524                         limcount = POLICY_FIFO_LIMIT_COUNT;
 525                         fifo_base = (policy_fifo_base_t) base;
 526                         if (fifo_base->base_priority > thread->max_priority) {
 527                                 result = KERN_POLICY_LIMIT;
 528                                 break;
 529                         }
 530
 531                         fifo_limit.max_priority = thread->max_priority;
 532                         limit = (policy_limit_t) &fifo_limit;
 533
 534                         break;
 535                 }
 536
 537                 case POLICY_TIMESHARE:
 538                 {
 539                         policy_timeshare_base_t ts_base;
 540
 541                         if (count != POLICY_TIMESHARE_BASE_COUNT) {
 542                                 result = KERN_INVALID_ARGUMENT;
 543                                 break;
 544                         }
 545
 546                         limcount = POLICY_TIMESHARE_LIMIT_COUNT;
 547                         ts_base = (policy_timeshare_base_t) base;
 548                         if (ts_base->base_priority > thread->max_priority) {
 549                                 result = KERN_POLICY_LIMIT;
 550                                 break;
 551                         }
 552
 553                         ts_limit.max_priority = thread->max_priority;
 554                         limit = (policy_limit_t) &ts_limit;
 555
 556                         break;
 557                 }
 558
 559                 default:
 560                         result = KERN_INVALID_POLICY;
 561                         break;
 562                 }
 563
 564         }
 565
 566         act_unlock_thread(thr_act);
 567
 568         if (result == KERN_SUCCESS)
 569             result = thread_set_policy(thr_act, pset,
 570                                          policy, base, count, limit, limcount);
 571
 572         return(result);
 573 }
 574
 575 /*
 576  *      Define shifts for simulating (5/8)**n
 577  */
 578
 579 shift_data_t    wait_shift[32] = {
 580         {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7},
 581         {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13},
 582         {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18},
 583         {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}};
 584
 585 /*
 586  *      do_priority_computation:
 587  *
 588  *      Calculate new priority for thread based on its base priority plus
 589  *      accumulated usage.  PRI_SHIFT and PRI_SHIFT_2 convert from
 590  *      usage to priorities.  SCHED_SHIFT converts for the scaling
 591  *      of the sched_usage field by SCHED_SCALE.  This scaling comes
 592  *      from the multiplication by sched_load (thread_timer_delta)
 593  *      in sched.h.  sched_load is calculated as a scaled overload
 594  *      factor in compute_mach_factor (mach_factor.c).
 595  */
 596 #ifdef  PRI_SHIFT_2
 597 #if     PRI_SHIFT_2 > 0
 598 #define do_priority_computation(thread, pri)                                            \
 599         MACRO_BEGIN                                                                                                             \
 600         (pri) = (thread)->priority              /* start with base priority */  \
 601             - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT))              \
 602             - ((thread)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT));   \
 603         if ((pri) < MINPRI_STANDARD)                                                                    \
 604                 (pri) = MINPRI_STANDARD;                                                                        \
 605         else                                                                                                                    \
 606         if ((pri) > MAXPRI_STANDARD)                                                                    \
 607                 (pri) = MAXPRI_STANDARD;                                                                        \
 608         MACRO_END
 609 #else   /* PRI_SHIFT_2 */
 610 #define do_priority_computation(thread, pri)                                            \
 611         MACRO_BEGIN                                                                                                             \
 612         (pri) = (thread)->priority              /* start with base priority */  \
 613             - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT))              \
 614             + ((thread)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2));   \
 615         if ((pri) < MINPRI_STANDARD)                                                                    \
 616                 (pri) = MINPRI_STANDARD;                                                                        \
 617         else                                                                                                                    \
 618         if ((pri) > MAXPRI_STANDARD)                                                                    \
 619                 (pri) = MAXPRI_STANDARD;                                                                        \
 620         MACRO_END
 621 #endif  /* PRI_SHIFT_2 */
 622 #else   /* defined(PRI_SHIFT_2) */
 623 #define do_priority_computation(thread, pri)                                            \
 624         MACRO_BEGIN                                                                                                             \
 625         (pri) = (thread)->priority              /* start with base priority */  \
 626             - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT));             \
 627         if ((pri) < MINPRI_STANDARD)                                                                    \
 628                 (pri) = MINPRI_STANDARD;                                                                        \
 629         else                                                                                                                    \
 630         if ((pri) > MAXPRI_STANDARD)                                                                    \
 631                 (pri) = MAXPRI_STANDARD;                                                                        \
 632         MACRO_END
 633 #endif  /* defined(PRI_SHIFT_2) */
 634
 635 void
 636 set_priority(
 637         register thread_t       thread,
 638         register int            priority)
 639 {
 640         thread->priority = priority;
 641         compute_priority(thread, FALSE);
 642 }
 643
 644 /*
 645  *      compute_priority:
 646  *
 647  *      Reset the current scheduled priority of the
 648  *      thread according to its base priority if the
 649  *      thread has not been promoted or depressed.
 650  *
 651  *      If the thread is timesharing, adjust according
 652  *      to recent cpu usage.
 653  *
 654  *      The thread *must* be locked by the caller.
 655  */
 656 void
 657 compute_priority(
 658         register thread_t       thread,
 659         boolean_t                       override_depress)
 660 {
 661         register int            priority;
 662
 663         if (    !(thread->sched_mode & TH_MODE_PROMOTED)                        &&
 664                         (!(thread->sched_mode & TH_MODE_ISDEPRESSED)    ||
 665                                  override_depress                                                       )               ) {
 666                 if (thread->sched_mode & TH_MODE_TIMESHARE)
 667                         do_priority_computation(thread, priority);
 668                 else
 669                         priority = thread->priority;
 670
 671                 set_sched_pri(thread, priority);
 672         }
 673 }
 674
 675 /*
 676  *      compute_my_priority:
 677  *
 678  *      Version of compute priority for current thread.
 679  *      Caller must     have thread     locked and thread must
 680  *      be timesharing and not depressed.
 681  *
 682  *      Only used for priority updates.
 683  */
 684 void
 685 compute_my_priority(
 686         register thread_t       thread)
 687 {
 688         register int            priority;
 689
 690         do_priority_computation(thread, priority);
 691         assert(thread->runq == RUN_QUEUE_NULL);
 692         thread->sched_pri = priority;
 693 }
 694
 695 /*
 696  *      update_priority
 697  *
 698  *      Cause the priority computation of a thread that has been
 699  *      sleeping or suspended to "catch up" with the system.  Thread
 700  *      *MUST* be locked by caller.  If thread is running, then this
 701  *      can only be called by the thread on itself.
 702  */
 703 void
 704 update_priority(
 705         register thread_t               thread)
 706 {
 707         register unsigned int   ticks;
 708         register shift_t                shiftp;
 709
 710         ticks = sched_tick - thread->sched_stamp;
 711         assert(ticks != 0);
 712
 713         /*
 714          *      If asleep for more than 30 seconds forget all
 715          *      cpu_usage, else catch up on missed aging.
 716          *      5/8 ** n is approximated by the two shifts
 717          *      in the wait_shift array.
 718          */
 719         thread->sched_stamp += ticks;
 720         thread_timer_delta(thread);
 721         if (ticks >  30) {
 722                 thread->cpu_usage = 0;
 723                 thread->sched_usage = 0;
 724         }
 725         else {
 726                 thread->cpu_usage += thread->cpu_delta;
 727                 thread->sched_usage += thread->sched_delta;
 728
 729                 shiftp = &wait_shift[ticks];
 730                 if (shiftp->shift2 > 0) {
 731                     thread->cpu_usage =
 732                                                 (thread->cpu_usage >> shiftp->shift1) +
 733                                                 (thread->cpu_usage >> shiftp->shift2);
 734                     thread->sched_usage =
 735                                                 (thread->sched_usage >> shiftp->shift1) +
 736                                                 (thread->sched_usage >> shiftp->shift2);
 737                 }
 738                 else {
 739                     thread->cpu_usage =
 740                                                 (thread->cpu_usage >> shiftp->shift1) -
 741                                                 (thread->cpu_usage >> -(shiftp->shift2));
 742                     thread->sched_usage =
 743                                                 (thread->sched_usage >> shiftp->shift1) -
 744                                                 (thread->sched_usage >> -(shiftp->shift2));
 745                 }
 746         }
 747
 748         thread->cpu_delta = 0;
 749         thread->sched_delta = 0;
 750
 751         /*
 752          *      Check for fail-safe release.
 753          */
 754         if (    (thread->sched_mode & TH_MODE_FAILSAFE)         &&
 755                         thread->sched_stamp >= thread->safe_release             ) {
 756                 if (!(thread->safe_mode & TH_MODE_TIMESHARE)) {
 757                         if (thread->safe_mode & TH_MODE_REALTIME) {
 758                                 thread->priority = BASEPRI_REALTIME;
 759
 760                                 thread->sched_mode |= TH_MODE_REALTIME;
 761                         }
 762
 763                         thread->sched_mode &= ~TH_MODE_TIMESHARE;
 764
 765                         if (!(thread->sched_mode & TH_MODE_ISDEPRESSED))
 766                                 set_sched_pri(thread, thread->priority);
 767                 }
 768
 769                 thread->safe_mode = 0;
 770                 thread->sched_mode &= ~TH_MODE_FAILSAFE;
 771         }
 772
 773         /*
 774          *      Recompute scheduled priority if appropriate.
 775          */
 776         if (    (thread->sched_mode & TH_MODE_TIMESHARE)        &&
 777                         !(thread->sched_mode & TH_MODE_PROMOTED)        &&
 778                         !(thread->sched_mode & TH_MODE_ISDEPRESSED)             ) {
 779                 register int            new_pri;
 780
 781                 do_priority_computation(thread, new_pri);
 782                 if (new_pri != thread->sched_pri) {
 783                         run_queue_t             runq;
 784
 785                         runq = rem_runq(thread);
 786                         thread->sched_pri = new_pri;
 787                         if (runq != RUN_QUEUE_NULL)
 788                                 thread_setrun(thread, TAIL_Q);
 789                 }
 790         }
 791 }
 792
 793 /*
 794  *      thread_switch_continue:
 795  *
 796  *      Continuation routine for a thread switch.
 797  *
 798  *      Just need to arrange the return value gets sent out correctly and that
 799  *  we cancel the timer or the depression called for by the options to the
 800  *  thread_switch call.
 801  */
 802 void
 803 _mk_sp_thread_switch_continue(void)
 804 {
 805         register thread_t       self = current_thread();
 806         int                                     wait_result = self->wait_result;
 807         int                                     option = self->saved.swtch.option;
 808
 809         if (option == SWITCH_OPTION_WAIT && wait_result != THREAD_TIMED_OUT)
 810                 thread_cancel_timer();
 811         else
 812         if (option == SWITCH_OPTION_DEPRESS)
 813                 _mk_sp_thread_depress_abort(self, FALSE);
 814
 815         thread_syscall_return(KERN_SUCCESS);
 816         /*NOTREACHED*/
 817 }
 818
 819 /*
 820  *      thread_switch:
 821  *
 822  *      Context switch.  User may supply thread hint.
 823  *
 824  *      Fixed priority threads that call this get what they asked for
 825  *      even if that violates priority order.
 826  */
 827 kern_return_t
 828 _mk_sp_thread_switch(
 829         thread_act_t                    hint_act,
 830         int                                             option,
 831         mach_msg_timeout_t              option_time)
 832 {
 833     register thread_t           self = current_thread();
 834     register processor_t        myprocessor;
 835         int                                             s;
 836
 837     /*
 838      *  Check and use thr_act hint if appropriate.  It is not
 839      *  appropriate to give a hint that shares the current shuttle.
 840      */
 841         if (hint_act != THR_ACT_NULL) {
 842                 register thread_t               thread = act_lock_thread(hint_act);
 843
 844                 if (            thread != THREAD_NULL                   &&
 845                                         thread != self                                  &&
 846                                         thread->top_act == hint_act                             ) {
 847                         s = splsched();
 848                         thread_lock(thread);
 849
 850                         /*
 851                          *      Check if the thread is in the right pset. Then
 852                          *      pull it off its run queue.  If it
 853                          *      doesn't come, then it's not eligible.
 854                          */
 855                         if (    thread->processor_set == self->processor_set    &&
 856                                         rem_runq(thread) != RUN_QUEUE_NULL                                      ) {
 857                                 /*
 858                                  *      Hah, got it!!
 859                                  */
 860                                 thread_unlock(thread);
 861
 862                                 act_unlock_thread(hint_act);
 863                                 act_deallocate(hint_act);
 864
 865                                 if (option == SWITCH_OPTION_WAIT)
 866                                         assert_wait_timeout(option_time, THREAD_ABORTSAFE);
 867                                 else
 868                                 if (option == SWITCH_OPTION_DEPRESS)
 869                                         _mk_sp_thread_depress_ms(option_time);
 870
 871                                 self->saved.swtch.option = option;
 872
 873                                 thread_run(self, _mk_sp_thread_switch_continue, thread);
 874                                 /* NOTREACHED */
 875                         }
 876
 877                         thread_unlock(thread);
 878                         splx(s);
 879                 }
 880
 881                 act_unlock_thread(hint_act);
 882                 act_deallocate(hint_act);
 883     }
 884
 885     /*
 886      *  No handoff hint supplied, or hint was wrong.  Call thread_block() in
 887      *  hopes of running something else.  If nothing else is runnable,
 888      *  thread_block will detect this.  WARNING: thread_switch with no
 889      *  option will not do anything useful if the thread calling it is the
 890      *  highest priority thread (can easily happen with a collection
 891      *  of timesharing threads).
 892      */
 893         mp_disable_preemption();
 894     myprocessor = current_processor();
 895     if (        option != SWITCH_OPTION_NONE                                    ||
 896                         myprocessor->processor_set->runq.count > 0              ||
 897                         myprocessor->runq.count > 0                                                     ) {
 898                 mp_enable_preemption();
 899
 900                 if (option == SWITCH_OPTION_WAIT)
 901                         assert_wait_timeout(option_time, THREAD_ABORTSAFE);
 902                 else
 903                 if (option == SWITCH_OPTION_DEPRESS)
 904                         _mk_sp_thread_depress_ms(option_time);
 905
 906                 self->saved.swtch.option = option;
 907
 908                 thread_block_reason(_mk_sp_thread_switch_continue,
 909                                                                         (option == SWITCH_OPTION_DEPRESS)?
 910                                                                                         AST_YIELD: AST_NONE);
 911         }
 912         else
 913                 mp_enable_preemption();
 914
 915 out:
 916         if (option == SWITCH_OPTION_WAIT)
 917                 thread_cancel_timer();
 918         else
 919         if (option == SWITCH_OPTION_DEPRESS)
 920                 _mk_sp_thread_depress_abort(self, FALSE);
 921
 922     return (KERN_SUCCESS);
 923 }
 924
 925 /*
 926  * Depress thread's priority to lowest possible for the specified interval,
 927  * with a value of zero resulting in no timeout being scheduled.
 928  */
 929 void
 930 _mk_sp_thread_depress_abstime(
 931         uint64_t                                interval)
 932 {
 933         register thread_t               self = current_thread();
 934         uint64_t                                deadline;
 935     spl_t                                       s;
 936
 937     s = splsched();
 938         wake_lock(self);
 939     thread_lock(self);
 940         if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
 941                 processor_t             myprocessor = self->last_processor;
 942
 943                 self->sched_pri = DEPRESSPRI;
 944                 myprocessor->current_pri = self->sched_pri;
 945                 self->sched_mode &= ~TH_MODE_PREEMPT;
 946                 self->sched_mode |= TH_MODE_DEPRESS;
 947                 thread_unlock(self);
 948
 949                 if (interval != 0) {
 950                         clock_absolutetime_interval_to_deadline(interval, &deadline);
 951                         if (!timer_call_enter(&self->depress_timer, deadline))
 952                                 self->depress_timer_active++;
 953                 }
 954         }
 955         else
 956                 thread_unlock(self);
 957         wake_unlock(self);
 958     splx(s);
 959 }
 960
 961 void
 962 _mk_sp_thread_depress_ms(
 963         mach_msg_timeout_t              interval)
 964 {
 965         uint64_t                abstime;
 966
 967         clock_interval_to_absolutetime_interval(
 968                                                         interval, 1000*NSEC_PER_USEC, &abstime);
 969         _mk_sp_thread_depress_abstime(abstime);
 970 }
 971
 972 /*
 973  *      Priority depression expiration.
 974  */
 975 void
 976 thread_depress_expire(
 977         timer_call_param_t              p0,
 978         timer_call_param_t              p1)
 979 {
 980         thread_t                thread = p0;
 981     spl_t                       s;
 982
 983     s = splsched();
 984     wake_lock(thread);
 985         if (--thread->depress_timer_active == 1) {
 986                 thread_lock(thread);
 987                 thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
 988                 compute_priority(thread, FALSE);
 989                 thread_unlock(thread);
 990         }
 991         else
 992         if (thread->depress_timer_active == 0)
 993                 thread_wakeup_one(&thread->depress_timer_active);
 994     wake_unlock(thread);
 995     splx(s);
 996 }
 997
 998 /*
 999  *      Prematurely abort priority depression if there is one.
1000  */
1001 kern_return_t
1002 _mk_sp_thread_depress_abort(
1003         register thread_t               thread,
1004         boolean_t                               abortall)
1005 {
1006     kern_return_t                       result = KERN_NOT_DEPRESSED;
1007     spl_t                                       s;
1008
1009     s = splsched();
1010         wake_lock(thread);
1011     thread_lock(thread);
1012         if (abortall || !(thread->sched_mode & TH_MODE_POLLDEPRESS)) {
1013                 if (thread->sched_mode & TH_MODE_ISDEPRESSED) {
1014                         thread->sched_mode &= ~TH_MODE_ISDEPRESSED;
1015                         compute_priority(thread, FALSE);
1016                         result = KERN_SUCCESS;
1017                 }
1018
1019                 thread_unlock(thread);
1020
1021                 if (timer_call_cancel(&thread->depress_timer))
1022                         thread->depress_timer_active--;
1023         }
1024         else
1025                 thread_unlock(thread);
1026         wake_unlock(thread);
1027     splx(s);
1028
1029     return (result);
1030 }
1031
1032 void
1033 _mk_sp_thread_perhaps_yield(
1034         thread_t                        self)
1035 {
1036         spl_t                   s;
1037
1038         assert(self == current_thread());
1039
1040         s = splsched();
1041         if (!(self->sched_mode & (TH_MODE_REALTIME|TH_MODE_TIMESHARE))) {
1042                 extern uint64_t         max_poll_computation;
1043                 extern int                      sched_poll_yield_shift;
1044                 uint64_t                        abstime, total_computation;
1045
1046                 clock_get_uptime(&abstime);
1047                 total_computation = abstime - self->computation_epoch;
1048                 total_computation += self->computation_metered;
1049                 if (total_computation >= max_poll_computation) {
1050                         processor_t             myprocessor = current_processor();
1051                         ast_t                   preempt;
1052
1053                         wake_lock(self);
1054                         thread_lock(self);
1055                         if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) {
1056                                 self->sched_pri = DEPRESSPRI;
1057                                 myprocessor->current_pri = self->sched_pri;
1058                                 self->sched_mode &= ~TH_MODE_PREEMPT;
1059                         }
1060                         self->computation_epoch = abstime;
1061                         self->computation_metered = 0;
1062                         self->sched_mode |= TH_MODE_POLLDEPRESS;
1063                         thread_unlock(self);
1064
1065                         abstime += (total_computation >> sched_poll_yield_shift);
1066                         if (!timer_call_enter(&self->depress_timer, abstime))
1067                                 self->depress_timer_active++;
1068                         wake_unlock(self);
1069
1070                         if ((preempt = csw_check(self, myprocessor)) != AST_NONE)
1071                                 ast_on(preempt);
1072                 }
1073         }
1074         splx(s);
1075 }