osfmk/kern/syscall_subr.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58
  59 #include <mach/boolean.h>
  60 #include <mach/thread_switch.h>
  61 #include <ipc/ipc_port.h>
  62 #include <ipc/ipc_space.h>
  63 #include <kern/counters.h>
  64 #include <kern/ipc_kobject.h>
  65 #include <kern/processor.h>
  66 #include <kern/sched.h>
  67 #include <kern/sched_prim.h>
  68 #include <kern/spl.h>
  69 #include <kern/task.h>
  70 #include <kern/thread.h>
  71 #include <mach/policy.h>
  72
  73 #include <kern/syscall_subr.h>
  74 #include <mach/mach_host_server.h>
  75 #include <mach/mach_syscalls.h>
  76 #include <sys/kdebug.h>
  77
  78 #ifdef MACH_BSD
  79 extern void workqueue_thread_yielded(void);
  80 extern sched_call_t workqueue_get_sched_callback(void);
  81 #endif /* MACH_BSD */
  82
  83
  84 /* Called from commpage to take a delayed preemption when exiting
  85  * the "Preemption Free Zone" (PFZ).
  86  */
  87 kern_return_t
  88 pfz_exit(
  89 __unused        struct pfz_exit_args *args)
  90 {
  91         /* For now, nothing special to do.  We'll pick up the ASTs on kernel exit. */
  92
  93         return (KERN_SUCCESS);
  94 }
  95
  96
  97 /*
  98  *      swtch and swtch_pri both attempt to context switch (logic in
  99  *      thread_block no-ops the context switch if nothing would happen).
 100  *      A boolean is returned that indicates whether there is anything
 101  *      else runnable.
 102  *
 103  *      This boolean can be used by a thread waiting on a
 104  *      lock or condition:  If FALSE is returned, the thread is justified
 105  *      in becoming a resource hog by continuing to spin because there's
 106  *      nothing else useful that the processor could do.  If TRUE is
 107  *      returned, the thread should make one more check on the
 108  *      lock and then be a good citizen and really suspend.
 109  */
 110
 111 static void
 112 swtch_continue(void)
 113 {
 114         register processor_t    myprocessor;
 115     boolean_t                           result;
 116
 117     disable_preemption();
 118         myprocessor = current_processor();
 119         result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 120         enable_preemption();
 121
 122         thread_syscall_return(result);
 123         /*NOTREACHED*/
 124 }
 125
 126 boolean_t
 127 swtch(
 128         __unused struct swtch_args *args)
 129 {
 130         register processor_t    myprocessor;
 131         boolean_t                               result;
 132
 133         disable_preemption();
 134         myprocessor = current_processor();
 135         if (SCHED(processor_queue_empty)(myprocessor) &&        rt_runq.count == 0) {
 136                 mp_enable_preemption();
 137
 138                 return (FALSE);
 139         }
 140         enable_preemption();
 141
 142         counter(c_swtch_block++);
 143
 144         thread_block_reason((thread_continue_t)swtch_continue, NULL, AST_YIELD);
 145
 146         disable_preemption();
 147         myprocessor = current_processor();
 148         result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 149         enable_preemption();
 150
 151         return (result);
 152 }
 153
 154 static void
 155 swtch_pri_continue(void)
 156 {
 157         register processor_t    myprocessor;
 158     boolean_t                           result;
 159
 160         thread_depress_abort_internal(current_thread());
 161
 162     disable_preemption();
 163         myprocessor = current_processor();
 164         result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 165         mp_enable_preemption();
 166
 167         thread_syscall_return(result);
 168         /*NOTREACHED*/
 169 }
 170
 171 boolean_t
 172 swtch_pri(
 173 __unused        struct swtch_pri_args *args)
 174 {
 175         register processor_t    myprocessor;
 176         boolean_t                               result;
 177
 178         disable_preemption();
 179         myprocessor = current_processor();
 180         if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
 181                 mp_enable_preemption();
 182
 183                 return (FALSE);
 184         }
 185         enable_preemption();
 186
 187         counter(c_swtch_pri_block++);
 188
 189         thread_depress_abstime(thread_depress_time);
 190
 191         thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD);
 192
 193         thread_depress_abort_internal(current_thread());
 194
 195         disable_preemption();
 196         myprocessor = current_processor();
 197         result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
 198         enable_preemption();
 199
 200         return (result);
 201 }
 202
 203 static int
 204 thread_switch_disable_workqueue_sched_callback(void)
 205 {
 206         sched_call_t callback = workqueue_get_sched_callback();
 207         thread_t self = current_thread();
 208         if (!callback || self->sched_call != callback) {
 209                 return FALSE;
 210         }
 211         spl_t s = splsched();
 212         thread_lock(self);
 213         thread_sched_call(self, NULL);
 214         thread_unlock(self);
 215         splx(s);
 216         return TRUE;
 217 }
 218
 219 static void
 220 thread_switch_enable_workqueue_sched_callback(void)
 221 {
 222         sched_call_t callback = workqueue_get_sched_callback();
 223         thread_t self = current_thread();
 224         spl_t s = splsched();
 225         thread_lock(self);
 226         thread_sched_call(self, callback);
 227         thread_unlock(self);
 228         splx(s);
 229 }
 230
 231 static void
 232 thread_switch_continue(void)
 233 {
 234         register thread_t       self = current_thread();
 235         int                                     option = self->saved.swtch.option;
 236         boolean_t                       reenable_workq_callback = self->saved.swtch.reenable_workq_callback;
 237
 238
 239         if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS)
 240                 thread_depress_abort_internal(self);
 241
 242         if (reenable_workq_callback)
 243                 thread_switch_enable_workqueue_sched_callback();
 244
 245         thread_syscall_return(KERN_SUCCESS);
 246         /*NOTREACHED*/
 247 }
 248
 249 /*
 250  *      thread_switch:
 251  *
 252  *      Context switch.  User may supply thread hint.
 253  */
 254 kern_return_t
 255 thread_switch(
 256         struct thread_switch_args *args)
 257 {
 258         register thread_t               thread, self = current_thread();
 259         mach_port_name_t                thread_name = args->thread_name;
 260         int                                             option = args->option;
 261         mach_msg_timeout_t              option_time = args->option_time;
 262         uint32_t                                scale_factor = NSEC_PER_MSEC;
 263         boolean_t                               reenable_workq_callback = FALSE;
 264         boolean_t                               depress_option = FALSE;
 265         boolean_t                               wait_option = FALSE;
 266
 267     /*
 268      *  Validate and process option.
 269      */
 270     switch (option) {
 271
 272         case SWITCH_OPTION_NONE:
 273                 workqueue_thread_yielded();
 274                 break;
 275         case SWITCH_OPTION_WAIT:
 276                 wait_option = TRUE;
 277                 workqueue_thread_yielded();
 278                 break;
 279         case SWITCH_OPTION_DEPRESS:
 280                 depress_option = TRUE;
 281                 workqueue_thread_yielded();
 282                 break;
 283         case SWITCH_OPTION_DISPATCH_CONTENTION:
 284                 scale_factor = NSEC_PER_USEC;
 285                 wait_option = TRUE;
 286                 if (thread_switch_disable_workqueue_sched_callback())
 287                         reenable_workq_callback = TRUE;
 288                 break;
 289         case SWITCH_OPTION_OSLOCK_DEPRESS:
 290                 depress_option = TRUE;
 291                 if (thread_switch_disable_workqueue_sched_callback())
 292                         reenable_workq_callback = TRUE;
 293                 break;
 294         case SWITCH_OPTION_OSLOCK_WAIT:
 295                 wait_option = TRUE;
 296                 if (thread_switch_disable_workqueue_sched_callback())
 297                         reenable_workq_callback = TRUE;
 298                 break;
 299         default:
 300             return (KERN_INVALID_ARGUMENT);
 301     }
 302
 303         /*
 304          * Translate the port name if supplied.
 305          */
 306     if (thread_name != MACH_PORT_NULL) {
 307                 ipc_port_t                      port;
 308
 309                 if (ipc_port_translate_send(self->task->itk_space,
 310                                                                         thread_name, &port) == KERN_SUCCESS) {
 311                         ip_reference(port);
 312                         ip_unlock(port);
 313
 314                         thread = convert_port_to_thread(port);
 315                         ip_release(port);
 316
 317                         if (thread == self) {
 318                                 (void)thread_deallocate_internal(thread);
 319                                 thread = THREAD_NULL;
 320                         }
 321                 }
 322                 else
 323                         thread = THREAD_NULL;
 324         }
 325         else
 326                 thread = THREAD_NULL;
 327
 328
 329         if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
 330                 if (thread != THREAD_NULL) {
 331
 332                         if (thread->task != self->task) {
 333                                 /*
 334                                  * OSLock boosting only applies to other threads
 335                                  * in your same task (even if you have a port for
 336                                  * a thread in another task)
 337                                  */
 338
 339                                 (void)thread_deallocate_internal(thread);
 340                                 thread = THREAD_NULL;
 341                         } else {
 342                                 /*
 343                                  * Attempt to kick the lock owner up to our same IO throttling tier.
 344                                  * If the thread is currently blocked in throttle_lowpri_io(),
 345                                  * it will immediately break out.
 346                                  */
 347                                 int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
 348
 349                                 set_thread_iotier_override(thread, new_policy);
 350                         }
 351                 }
 352         }
 353
 354         /*
 355          * Try to handoff if supplied.
 356          */
 357         if (thread != THREAD_NULL) {
 358                 processor_t             processor;
 359                 spl_t                   s;
 360
 361                 s = splsched();
 362                 thread_lock(thread);
 363
 364                 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
 365                                                           thread_tid(thread), thread->state, 0, 0, 0);
 366
 367                 /*
 368                  *      Check that the thread is not bound
 369                  *      to a different processor, and that realtime
 370                  *      is not involved.
 371                  *
 372                  *      Next, pull it off its run queue.  If it
 373                  *      doesn't come, it's not eligible.
 374                  */
 375                 processor = current_processor();
 376                 if (processor->current_pri < BASEPRI_RTQUEUES                   &&
 377                         thread->sched_pri < BASEPRI_RTQUEUES                            &&
 378                         (thread->bound_processor == PROCESSOR_NULL      ||
 379                          thread->bound_processor == processor)                          &&
 380                                 thread_run_queue_remove(thread)                                                 ) {
 381                         /*
 382                          *      Hah, got it!!
 383                          */
 384                         thread_unlock(thread);
 385
 386                         (void)thread_deallocate_internal(thread);
 387
 388                         if (wait_option)
 389                                 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE,
 390                                                                                                                 option_time, scale_factor);
 391                         else
 392                         if (depress_option)
 393                                 thread_depress_ms(option_time);
 394
 395                         self->saved.swtch.option = option;
 396                         self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
 397
 398                         thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread);
 399                         /* NOTREACHED */
 400                 }
 401
 402                 thread_unlock(thread);
 403                 splx(s);
 404
 405                 thread_deallocate(thread);
 406         }
 407
 408         if (wait_option)
 409                 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor);
 410         else
 411         if (depress_option)
 412                 thread_depress_ms(option_time);
 413
 414         self->saved.swtch.option = option;
 415         self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
 416
 417         thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD);
 418
 419         if (depress_option)
 420                 thread_depress_abort_internal(self);
 421
 422         if (reenable_workq_callback)
 423                 thread_switch_enable_workqueue_sched_callback();
 424
 425     return (KERN_SUCCESS);
 426 }
 427
 428 /*
 429  * Depress thread's priority to lowest possible for the specified interval,
 430  * with a value of zero resulting in no timeout being scheduled.
 431  */
 432 void
 433 thread_depress_abstime(
 434         uint64_t                                interval)
 435 {
 436         register thread_t               self = current_thread();
 437         uint64_t                                deadline;
 438     spl_t                                       s;
 439
 440     s = splsched();
 441     thread_lock(self);
 442         if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
 443                 processor_t             myprocessor = self->last_processor;
 444
 445                 self->sched_pri = DEPRESSPRI;
 446                 myprocessor->current_pri = self->sched_pri;
 447                 self->sched_flags |= TH_SFLAG_DEPRESS;
 448
 449                 if (interval != 0) {
 450                         clock_absolutetime_interval_to_deadline(interval, &deadline);
 451                         if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL))
 452                                 self->depress_timer_active++;
 453                 }
 454         }
 455         thread_unlock(self);
 456     splx(s);
 457 }
 458
 459 void
 460 thread_depress_ms(
 461         mach_msg_timeout_t              interval)
 462 {
 463         uint64_t                abstime;
 464
 465         clock_interval_to_absolutetime_interval(
 466                                                         interval, NSEC_PER_MSEC, &abstime);
 467         thread_depress_abstime(abstime);
 468 }
 469
 470 /*
 471  *      Priority depression expiration.
 472  */
 473 void
 474 thread_depress_expire(
 475         void                    *p0,
 476         __unused void   *p1)
 477 {
 478         thread_t                thread = p0;
 479     spl_t                       s;
 480
 481     s = splsched();
 482     thread_lock(thread);
 483         if (--thread->depress_timer_active == 0) {
 484                 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 485                 SCHED(compute_priority)(thread, FALSE);
 486         }
 487     thread_unlock(thread);
 488     splx(s);
 489 }
 490
 491 /*
 492  *      Prematurely abort priority depression if there is one.
 493  */
 494 kern_return_t
 495 thread_depress_abort_internal(
 496         thread_t                                thread)
 497 {
 498     kern_return_t                       result = KERN_NOT_DEPRESSED;
 499     spl_t                                       s;
 500
 501     s = splsched();
 502     thread_lock(thread);
 503         if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) {
 504                 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
 505                         thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
 506                         SCHED(compute_priority)(thread, FALSE);
 507                         result = KERN_SUCCESS;
 508                 }
 509
 510                 if (timer_call_cancel(&thread->depress_timer))
 511                         thread->depress_timer_active--;
 512         }
 513         thread_unlock(thread);
 514     splx(s);
 515
 516     return (result);
 517 }
 518
 519 void
 520 thread_poll_yield(
 521         thread_t                self)
 522 {
 523         spl_t                   s;
 524
 525         assert(self == current_thread());
 526
 527         s = splsched();
 528         if (self->sched_mode == TH_MODE_FIXED) {
 529                 uint64_t                        total_computation, abstime;
 530
 531                 abstime = mach_absolute_time();
 532                 total_computation = abstime - self->computation_epoch;
 533                 total_computation += self->computation_metered;
 534                 if (total_computation >= max_poll_computation) {
 535                         processor_t             myprocessor = current_processor();
 536                         ast_t                   preempt;
 537
 538                         thread_lock(self);
 539                         if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
 540                                 self->sched_pri = DEPRESSPRI;
 541                                 myprocessor->current_pri = self->sched_pri;
 542                         }
 543                         self->computation_epoch = abstime;
 544                         self->computation_metered = 0;
 545                         self->sched_flags |= TH_SFLAG_POLLDEPRESS;
 546
 547                         abstime += (total_computation >> sched_poll_yield_shift);
 548                         if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL))
 549                                 self->depress_timer_active++;
 550
 551                         if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE)
 552                                 ast_on(preempt);
 553
 554                         thread_unlock(self);
 555                 }
 556         }
 557         splx(s);
 558 }
 559
 560
 561 void
 562 thread_yield_internal(
 563         mach_msg_timeout_t      ms)
 564 {
 565         processor_t     myprocessor;
 566
 567         disable_preemption();
 568         myprocessor = current_processor();
 569         if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
 570                 mp_enable_preemption();
 571
 572                 return;
 573         }
 574         enable_preemption();
 575
 576         thread_depress_ms(ms);
 577
 578         thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
 579
 580         thread_depress_abort_internal(current_thread());
 581 }
 582