osfmk/kern/priority.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   clock_prim.c
  60  *      Author: Avadis Tevanian, Jr.
  61  *      Date:   1986
  62  *
  63  *      Clock primitives.
  64  */
  65
  66 #include <mach/boolean.h>
  67 #include <mach/kern_return.h>
  68 #include <mach/machine.h>
  69 #include <kern/host.h>
  70 #include <kern/mach_param.h>
  71 #include <kern/sched.h>
  72 #include <sys/kdebug.h>
  73 #include <kern/spl.h>
  74 #include <kern/thread.h>
  75 #include <kern/processor.h>
  76 #include <kern/ledger.h>
  77 #include <machine/machparam.h>
  78
  79 /*
  80  *      thread_quantum_expire:
  81  *
  82  *      Recalculate the quantum and priority for a thread.
  83  *
  84  *      Called at splsched.
  85  */
  86
  87 void
  88 thread_quantum_expire(
  89         timer_call_param_t      p0,
  90         timer_call_param_t      p1)
  91 {
  92         processor_t                     processor = p0;
  93         thread_t                        thread = p1;
  94         ast_t                           preempt;
  95         uint64_t                        ctime;
  96
  97         SCHED_STATS_QUANTUM_TIMER_EXPIRATION(processor);
  98
  99         /*
 100          * We bill CPU time to both the individual thread and its task.
 101          *
 102          * Because this balance adjustment could potentially attempt to wake this very
 103          * thread, we must credit the ledger before taking the thread lock. The ledger
 104          * pointers are only manipulated by the thread itself at the ast boundary.
 105          */
 106         ledger_credit(thread->t_ledger, task_ledgers.cpu_time, thread->current_quantum);
 107         ledger_credit(thread->t_threadledger, thread_ledgers.cpu_time, thread->current_quantum);
 108
 109         thread_lock(thread);
 110
 111         /*
 112          * We've run up until our quantum expiration, and will (potentially)
 113          * continue without re-entering the scheduler, so update this now.
 114          */
 115         thread->last_run_time = processor->quantum_end;
 116
 117         /*
 118          *      Check for fail-safe trip.
 119          */
 120         if ((thread->sched_mode == TH_MODE_REALTIME || thread->sched_mode == TH_MODE_FIXED) &&
 121             !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) &&
 122             !(thread->options & TH_OPT_SYSTEM_CRITICAL)) {
 123                 uint64_t new_computation;
 124
 125                 new_computation = processor->quantum_end - thread->computation_epoch;
 126                 new_computation += thread->computation_metered;
 127                 if (new_computation > max_unsafe_computation) {
 128                         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_FAILSAFE)|DBG_FUNC_NONE,
 129                                         (uintptr_t)thread->sched_pri, (uintptr_t)thread->sched_mode, 0, 0, 0);
 130
 131                         if (thread->sched_mode == TH_MODE_REALTIME) {
 132                                 thread->priority = DEPRESSPRI;
 133                         }
 134
 135                         thread->saved_mode = thread->sched_mode;
 136
 137                         if (SCHED(supports_timeshare_mode)) {
 138                                 sched_share_incr();
 139                                 thread->sched_mode = TH_MODE_TIMESHARE;
 140                         } else {
 141                                 /* XXX handle fixed->fixed case */
 142                                 thread->sched_mode = TH_MODE_FIXED;
 143                         }
 144
 145                         thread->safe_release = processor->quantum_end + sched_safe_duration;
 146                         thread->sched_flags |= TH_SFLAG_FAILSAFE;
 147                 }
 148         }
 149
 150         /*
 151          *      Recompute scheduled priority if appropriate.
 152          */
 153         if (SCHED(can_update_priority)(thread))
 154                 SCHED(update_priority)(thread);
 155         else
 156                 SCHED(lightweight_update_priority)(thread);
 157
 158         SCHED(quantum_expire)(thread);
 159
 160         processor->current_pri = thread->sched_pri;
 161         processor->current_thmode = thread->sched_mode;
 162
 163         /*
 164          *      This quantum is up, give this thread another.
 165          */
 166         if (first_timeslice(processor))
 167                 processor->timeslice--;
 168
 169         thread_quantum_init(thread);
 170         thread->last_quantum_refill_time = processor->quantum_end;
 171
 172         /* Reload precise timing global policy to thread-local policy */
 173         thread->precise_user_kernel_time = use_precise_user_kernel_time(thread);
 174
 175         /*
 176          * Since non-precise user/kernel time doesn't update the state/thread timer
 177          * during privilege transitions, synthesize an event now.
 178          */
 179         if (!thread->precise_user_kernel_time) {
 180                 timer_switch(PROCESSOR_DATA(processor, current_state),
 181                                          processor->quantum_end,
 182                                          PROCESSOR_DATA(processor, current_state));
 183                 timer_switch(PROCESSOR_DATA(processor, thread_timer),
 184                                          processor->quantum_end,
 185                                          PROCESSOR_DATA(processor, thread_timer));
 186         }
 187
 188         ctime = mach_absolute_time();
 189         processor->quantum_end = ctime + thread->current_quantum;
 190         timer_call_enter1(&processor->quantum_timer, thread,
 191             processor->quantum_end, TIMER_CALL_SYS_CRITICAL);
 192
 193         /*
 194          *      Context switch check.
 195          */
 196         if ((preempt = csw_check(processor)) != AST_NONE)
 197                 ast_on(preempt);
 198         else {
 199                 processor_set_t         pset = processor->processor_set;
 200
 201                 pset_lock(pset);
 202
 203                 pset_pri_hint(pset, processor, processor->current_pri);
 204                 pset_count_hint(pset, processor, SCHED(processor_runq_count)(processor));
 205
 206                 pset_unlock(pset);
 207         }
 208
 209         thread_unlock(thread);
 210
 211 #if defined(CONFIG_SCHED_TRADITIONAL)
 212         sched_traditional_consider_maintenance(ctime);
 213 #endif /* CONFIG_SCHED_TRADITIONAL */
 214 }
 215
 216 #if defined(CONFIG_SCHED_TRADITIONAL)
 217
 218 void
 219 sched_traditional_quantum_expire(thread_t       thread __unused)
 220 {
 221         /*
 222          * No special behavior when a timeshare, fixed, or realtime thread
 223          * uses up its entire quantum
 224          */
 225 }
 226
 227 void
 228 lightweight_update_priority(thread_t thread)
 229 {
 230         if (thread->sched_mode == TH_MODE_TIMESHARE) {
 231                 register uint32_t       delta;
 232
 233                 thread_timer_delta(thread, delta);
 234
 235                 /*
 236                  *      Accumulate timesharing usage only
 237                  *      during contention for processor
 238                  *      resources.
 239                  */
 240                 if (thread->pri_shift < INT8_MAX)
 241                         thread->sched_usage += delta;
 242
 243                 thread->cpu_delta += delta;
 244
 245                 /*
 246                  * Adjust the scheduled priority if
 247                  * the thread has not been promoted
 248                  * and is not depressed.
 249                  */
 250                 if (    !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) &&
 251                         !(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)                )
 252                         compute_my_priority(thread);
 253         }
 254 }
 255
 256 /*
 257  *      Define shifts for simulating (5/8) ** n
 258  *
 259  *      Shift structures for holding update shifts.  Actual computation
 260  *      is  usage = (usage >> shift1) +/- (usage >> abs(shift2))  where the
 261  *      +/- is determined by the sign of shift 2.
 262  */
 263 struct shift_data {
 264         int     shift1;
 265         int     shift2;
 266 };
 267
 268 #define SCHED_DECAY_TICKS       32
 269 static struct shift_data        sched_decay_shifts[SCHED_DECAY_TICKS] = {
 270         {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7},
 271         {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13},
 272         {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18},
 273         {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}
 274 };
 275
 276 /*
 277  *      do_priority_computation:
 278  *
 279  *      Calculate the timesharing priority based upon usage and load.
 280  */
 281
 282 #define do_priority_computation(thread, pri)                                                    \
 283         MACRO_BEGIN                                                                                                                     \
 284         (pri) = (thread)->priority              /* start with base priority */          \
 285             - ((thread)->sched_usage >> (thread)->pri_shift);                           \
 286         if ((pri) < MINPRI_USER)                                                                                        \
 287                 (pri) = MINPRI_USER;                                                                                    \
 288         else                                                                                                                            \
 289         if ((pri) > MAXPRI_KERNEL)                                                                                      \
 290                 (pri) = MAXPRI_KERNEL;                                                                                  \
 291         MACRO_END
 292
 293
 294 #endif
 295
 296 /*
 297  *      set_priority:
 298  *
 299  *      Set the base priority of the thread
 300  *      and reset its scheduled priority.
 301  *
 302  *      Called with the thread locked.
 303  */
 304 void
 305 set_priority(
 306         register thread_t       thread,
 307         register int            priority)
 308 {
 309         thread->priority = priority;
 310         SCHED(compute_priority)(thread, FALSE);
 311 }
 312
 313 #if defined(CONFIG_SCHED_TRADITIONAL)
 314
 315 /*
 316  *      compute_priority:
 317  *
 318  *      Reset the scheduled priority of the thread
 319  *      according to its base priority if the
 320  *      thread has not been promoted or depressed.
 321  *
 322  *      Called with the thread locked.
 323  */
 324 void
 325 compute_priority(
 326         register thread_t       thread,
 327         boolean_t                       override_depress)
 328 {
 329         register int            priority;
 330
 331         if (    !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK)                 &&
 332                         (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)       ||
 333                                  override_depress                                                       )               ) {
 334                 if (thread->sched_mode == TH_MODE_TIMESHARE)
 335                         do_priority_computation(thread, priority);
 336                 else
 337                         priority = thread->priority;
 338
 339                 set_sched_pri(thread, priority);
 340         }
 341 }
 342
 343 /*
 344  *      compute_my_priority:
 345  *
 346  *      Reset the scheduled priority for
 347  *      a timesharing thread.
 348  *
 349  *      Only for use on the current thread
 350  *      if timesharing and not depressed.
 351  *
 352  *      Called with the thread locked.
 353  */
 354 void
 355 compute_my_priority(
 356         register thread_t       thread)
 357 {
 358         register int            priority;
 359
 360         do_priority_computation(thread, priority);
 361         assert(thread->runq == PROCESSOR_NULL);
 362         thread->sched_pri = priority;
 363 }
 364
 365 /*
 366  *      can_update_priority
 367  *
 368  *      Make sure we don't do re-dispatches more frequently than a scheduler tick.
 369  *
 370  *      Called with the thread locked.
 371  */
 372 boolean_t
 373 can_update_priority(
 374                                         thread_t        thread)
 375 {
 376         if (sched_tick == thread->sched_stamp)
 377                 return (FALSE);
 378         else
 379                 return (TRUE);
 380 }
 381
 382 /*
 383  *      update_priority
 384  *
 385  *      Perform housekeeping operations driven by scheduler tick.
 386  *
 387  *      Called with the thread locked.
 388  */
 389 void
 390 update_priority(
 391         register thread_t       thread)
 392 {
 393         register unsigned       ticks;
 394         register uint32_t       delta;
 395
 396         ticks = sched_tick - thread->sched_stamp;
 397         assert(ticks != 0);
 398         thread->sched_stamp += ticks;
 399         if (sched_use_combined_fgbg_decay)
 400                 thread->pri_shift = sched_combined_fgbg_pri_shift;
 401         else if (thread->max_priority <= MAXPRI_THROTTLE)
 402                 thread->pri_shift = sched_background_pri_shift;
 403         else
 404                 thread->pri_shift = sched_pri_shift;
 405
 406         /* If requested, accelerate aging of sched_usage */
 407         if (sched_decay_usage_age_factor > 1)
 408                 ticks *= sched_decay_usage_age_factor;
 409
 410         /*
 411          *      Gather cpu usage data.
 412          */
 413         thread_timer_delta(thread, delta);
 414         if (ticks < SCHED_DECAY_TICKS) {
 415                 register struct shift_data      *shiftp;
 416
 417                 /*
 418                  *      Accumulate timesharing usage only
 419                  *      during contention for processor
 420                  *      resources.
 421                  */
 422                 if (thread->pri_shift < INT8_MAX)
 423                         thread->sched_usage += delta;
 424
 425                 thread->cpu_usage += delta + thread->cpu_delta;
 426                 thread->cpu_delta = 0;
 427
 428                 shiftp = &sched_decay_shifts[ticks];
 429                 if (shiftp->shift2 > 0) {
 430                     thread->cpu_usage =
 431                                                 (thread->cpu_usage >> shiftp->shift1) +
 432                                                 (thread->cpu_usage >> shiftp->shift2);
 433                     thread->sched_usage =
 434                                                 (thread->sched_usage >> shiftp->shift1) +
 435                                                 (thread->sched_usage >> shiftp->shift2);
 436                 }
 437                 else {
 438                     thread->cpu_usage =
 439                                                 (thread->cpu_usage >> shiftp->shift1) -
 440                                                 (thread->cpu_usage >> -(shiftp->shift2));
 441                     thread->sched_usage =
 442                                                 (thread->sched_usage >> shiftp->shift1) -
 443                                                 (thread->sched_usage >> -(shiftp->shift2));
 444                 }
 445         }
 446         else {
 447                 thread->cpu_usage = thread->cpu_delta = 0;
 448                 thread->sched_usage = 0;
 449         }
 450
 451         /*
 452          *      Check for fail-safe release.
 453          */
 454         if (    (thread->sched_flags & TH_SFLAG_FAILSAFE)               &&
 455                         mach_absolute_time() >= thread->safe_release            ) {
 456                 if (thread->saved_mode != TH_MODE_TIMESHARE) {
 457                         if (thread->saved_mode == TH_MODE_REALTIME) {
 458                                 thread->priority = BASEPRI_RTQUEUES;
 459                         }
 460
 461                         thread->sched_mode = thread->saved_mode;
 462                         thread->saved_mode = TH_MODE_NONE;
 463
 464                         if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN)
 465                                 sched_share_decr();
 466
 467                         if (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK))
 468                                 set_sched_pri(thread, thread->priority);
 469                 }
 470
 471                 thread->sched_flags &= ~TH_SFLAG_FAILSAFE;
 472         }
 473
 474
 475         /*
 476          *      Recompute scheduled priority if appropriate.
 477          */
 478         if (    (thread->sched_mode == TH_MODE_TIMESHARE)       &&
 479                         !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) &&
 480                         !(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK)                ) {
 481                 register int            new_pri;
 482
 483                 do_priority_computation(thread, new_pri);
 484                 if (new_pri != thread->sched_pri) {
 485                         boolean_t               removed = thread_run_queue_remove(thread);
 486
 487 #if 0
 488                         if (sched_use_combined_fgbg_decay && ((thread)->task->max_priority > MAXPRI_THROTTLE) && (new_pri == MAXPRI_THROTTLE)) {
 489                                 /* with the alternate (new) algorithm, would we have decayed this far? */
 490                                 int alt_pri = thread->priority - (thread->sched_usage >> sched_pri_shift);
 491                                 if ((alt_pri > new_pri) && (sched_background_count > 0)) {
 492                                         printf("thread %p would have decayed to only %d instead of %d\n", thread, alt_pri, new_pri);
 493                                 }
 494                         }
 495 #endif
 496
 497                         KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_DECAY_PRIORITY)|DBG_FUNC_NONE,
 498                                                           (uintptr_t)thread_tid(thread),
 499                                                           thread->priority,
 500                                                           thread->sched_pri,
 501                                                           new_pri,
 502                                                           0);
 503                         thread->sched_pri = new_pri;
 504
 505                         if (removed)
 506                                 thread_setrun(thread, SCHED_TAILQ);
 507                 }
 508         }
 509
 510         return;
 511 }
 512
 513 #endif /* CONFIG_SCHED_TRADITIONAL */