]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/syscall_subr.c
e413d30c213acc3dbe312fced82b98281fd0b25f
[apple/xnu.git] / osfmk / kern / syscall_subr.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #include <mach/boolean.h>
60 #include <mach/thread_switch.h>
61 #include <ipc/ipc_port.h>
62 #include <ipc/ipc_space.h>
63 #include <kern/counters.h>
64 #include <kern/ipc_kobject.h>
65 #include <kern/processor.h>
66 #include <kern/sched.h>
67 #include <kern/sched_prim.h>
68 #include <kern/spl.h>
69 #include <kern/task.h>
70 #include <kern/thread.h>
71 #include <kern/policy_internal.h>
72
73 #include <mach/policy.h>
74
75 #include <kern/syscall_subr.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/mach_syscalls.h>
78 #include <sys/kdebug.h>
79
80 #ifdef MACH_BSD
81 extern void workqueue_thread_yielded(void);
82 extern sched_call_t workqueue_get_sched_callback(void);
83 #endif /* MACH_BSD */
84
85
86 /* Called from commpage to take a delayed preemption when exiting
87 * the "Preemption Free Zone" (PFZ).
88 */
89 kern_return_t
90 pfz_exit(
91 __unused struct pfz_exit_args *args)
92 {
93 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
94
95 return (KERN_SUCCESS);
96 }
97
98
99 /*
100 * swtch and swtch_pri both attempt to context switch (logic in
101 * thread_block no-ops the context switch if nothing would happen).
102 * A boolean is returned that indicates whether there is anything
103 * else runnable. That's no excuse to spin, though.
104 */
105
106 static void
107 swtch_continue(void)
108 {
109 processor_t myprocessor;
110 boolean_t result;
111
112 disable_preemption();
113 myprocessor = current_processor();
114 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
115 enable_preemption();
116
117 thread_syscall_return(result);
118 /*NOTREACHED*/
119 }
120
121 boolean_t
122 swtch(
123 __unused struct swtch_args *args)
124 {
125 processor_t myprocessor;
126 boolean_t result;
127
128 disable_preemption();
129 myprocessor = current_processor();
130 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
131 mp_enable_preemption();
132
133 return (FALSE);
134 }
135 enable_preemption();
136
137 counter(c_swtch_block++);
138
139 thread_block_reason((thread_continue_t)swtch_continue, NULL, AST_YIELD);
140
141 disable_preemption();
142 myprocessor = current_processor();
143 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
144 enable_preemption();
145
146 return (result);
147 }
148
149 static void
150 swtch_pri_continue(void)
151 {
152 processor_t myprocessor;
153 boolean_t result;
154
155 thread_depress_abort_internal(current_thread());
156
157 disable_preemption();
158 myprocessor = current_processor();
159 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
160 mp_enable_preemption();
161
162 thread_syscall_return(result);
163 /*NOTREACHED*/
164 }
165
166 boolean_t
167 swtch_pri(
168 __unused struct swtch_pri_args *args)
169 {
170 processor_t myprocessor;
171 boolean_t result;
172
173 disable_preemption();
174 myprocessor = current_processor();
175 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
176 mp_enable_preemption();
177
178 return (FALSE);
179 }
180 enable_preemption();
181
182 counter(c_swtch_pri_block++);
183
184 thread_depress_abstime(thread_depress_time);
185
186 thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD);
187
188 thread_depress_abort_internal(current_thread());
189
190 disable_preemption();
191 myprocessor = current_processor();
192 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
193 enable_preemption();
194
195 return (result);
196 }
197
198 static boolean_t
199 thread_switch_disable_workqueue_sched_callback(void)
200 {
201 sched_call_t callback = workqueue_get_sched_callback();
202 return thread_disable_sched_call(current_thread(), callback) != NULL;
203 }
204
205 static void
206 thread_switch_enable_workqueue_sched_callback(void)
207 {
208 sched_call_t callback = workqueue_get_sched_callback();
209 thread_reenable_sched_call(current_thread(), callback);
210 }
211
212 static void
213 thread_switch_continue(void)
214 {
215 thread_t self = current_thread();
216 int option = self->saved.swtch.option;
217 boolean_t reenable_workq_callback = self->saved.swtch.reenable_workq_callback;
218
219
220 if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS)
221 thread_depress_abort_internal(self);
222
223 if (reenable_workq_callback)
224 thread_switch_enable_workqueue_sched_callback();
225
226 thread_syscall_return(KERN_SUCCESS);
227 /*NOTREACHED*/
228 }
229
230 /*
231 * thread_switch:
232 *
233 * Context switch. User may supply thread hint.
234 */
235 kern_return_t
236 thread_switch(
237 struct thread_switch_args *args)
238 {
239 thread_t thread = THREAD_NULL;
240 thread_t self = current_thread();
241 mach_port_name_t thread_name = args->thread_name;
242 int option = args->option;
243 mach_msg_timeout_t option_time = args->option_time;
244 uint32_t scale_factor = NSEC_PER_MSEC;
245 boolean_t reenable_workq_callback = FALSE;
246 boolean_t depress_option = FALSE;
247 boolean_t wait_option = FALSE;
248
249 /*
250 * Validate and process option.
251 */
252 switch (option) {
253
254 case SWITCH_OPTION_NONE:
255 workqueue_thread_yielded();
256 break;
257 case SWITCH_OPTION_WAIT:
258 wait_option = TRUE;
259 workqueue_thread_yielded();
260 break;
261 case SWITCH_OPTION_DEPRESS:
262 depress_option = TRUE;
263 workqueue_thread_yielded();
264 break;
265 case SWITCH_OPTION_DISPATCH_CONTENTION:
266 scale_factor = NSEC_PER_USEC;
267 wait_option = TRUE;
268 if (thread_switch_disable_workqueue_sched_callback())
269 reenable_workq_callback = TRUE;
270 break;
271 case SWITCH_OPTION_OSLOCK_DEPRESS:
272 depress_option = TRUE;
273 if (thread_switch_disable_workqueue_sched_callback())
274 reenable_workq_callback = TRUE;
275 break;
276 case SWITCH_OPTION_OSLOCK_WAIT:
277 wait_option = TRUE;
278 if (thread_switch_disable_workqueue_sched_callback())
279 reenable_workq_callback = TRUE;
280 break;
281 default:
282 return (KERN_INVALID_ARGUMENT);
283 }
284
285 /*
286 * Translate the port name if supplied.
287 */
288 if (thread_name != MACH_PORT_NULL) {
289 ipc_port_t port;
290
291 if (ipc_port_translate_send(self->task->itk_space,
292 thread_name, &port) == KERN_SUCCESS) {
293 ip_reference(port);
294 ip_unlock(port);
295
296 thread = convert_port_to_thread(port);
297 ip_release(port);
298
299 if (thread == self) {
300 thread_deallocate(thread);
301 thread = THREAD_NULL;
302 }
303 }
304 }
305
306 if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
307 if (thread != THREAD_NULL) {
308
309 if (thread->task != self->task) {
310 /*
311 * OSLock boosting only applies to other threads
312 * in your same task (even if you have a port for
313 * a thread in another task)
314 */
315
316 thread_deallocate(thread);
317 thread = THREAD_NULL;
318 } else {
319 /*
320 * Attempt to kick the lock owner up to our same IO throttling tier.
321 * If the thread is currently blocked in throttle_lowpri_io(),
322 * it will immediately break out.
323 *
324 * TODO: SFI break out?
325 */
326 int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
327
328 set_thread_iotier_override(thread, new_policy);
329 }
330 }
331 }
332
333 /*
334 * Try to handoff if supplied.
335 */
336 if (thread != THREAD_NULL) {
337 spl_t s = splsched();
338
339 /* This may return a different thread if the target is pushing on something */
340 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
341
342 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
343 thread_tid(thread), thread->state,
344 pulled_thread ? TRUE : FALSE, 0, 0);
345
346 if (pulled_thread != THREAD_NULL) {
347 /* We can't be dropping the last ref here */
348 thread_deallocate_safe(thread);
349
350 if (wait_option)
351 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE,
352 option_time, scale_factor);
353 else if (depress_option)
354 thread_depress_ms(option_time);
355
356 self->saved.swtch.option = option;
357 self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
358
359 thread_run(self, (thread_continue_t)thread_switch_continue, NULL, pulled_thread);
360 /* NOTREACHED */
361 panic("returned from thread_run!");
362 }
363
364 splx(s);
365
366 thread_deallocate(thread);
367 }
368
369 if (wait_option)
370 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor);
371 else if (depress_option)
372 thread_depress_ms(option_time);
373
374 self->saved.swtch.option = option;
375 self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
376
377 thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD);
378
379 if (depress_option)
380 thread_depress_abort_internal(self);
381
382 if (reenable_workq_callback)
383 thread_switch_enable_workqueue_sched_callback();
384
385 return (KERN_SUCCESS);
386 }
387
388 /* Returns a +1 thread reference */
389 thread_t
390 port_name_to_thread_for_ulock(mach_port_name_t thread_name)
391 {
392 thread_t thread = THREAD_NULL;
393 thread_t self = current_thread();
394
395 /*
396 * Translate the port name if supplied.
397 */
398 if (thread_name != MACH_PORT_NULL) {
399 ipc_port_t port;
400
401 if (ipc_port_translate_send(self->task->itk_space,
402 thread_name, &port) == KERN_SUCCESS) {
403 ip_reference(port);
404 ip_unlock(port);
405
406 thread = convert_port_to_thread(port);
407 ip_release(port);
408
409 if (thread == THREAD_NULL) {
410 return thread;
411 }
412
413 if ((thread == self) || (thread->task != self->task)) {
414 thread_deallocate(thread);
415 thread = THREAD_NULL;
416 }
417 }
418 }
419
420 return thread;
421 }
422
423 /* This function is called after an assert_wait(), therefore it must not
424 * cause another wait until after the thread_run() or thread_block()
425 *
426 * Consumes a ref on thread
427 */
428 wait_result_t
429 thread_handoff(thread_t thread)
430 {
431 thread_t deallocate_thread = THREAD_NULL;
432 thread_t self = current_thread();
433
434 /*
435 * Try to handoff if supplied.
436 */
437 if (thread != THREAD_NULL) {
438 spl_t s = splsched();
439
440 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
441
442 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
443 thread_tid(thread), thread->state,
444 pulled_thread ? TRUE : FALSE, 0, 0);
445
446 if (pulled_thread != THREAD_NULL) {
447 /* We can't be dropping the last ref here */
448 thread_deallocate_safe(thread);
449
450 int result = thread_run(self, THREAD_CONTINUE_NULL, NULL, pulled_thread);
451
452 splx(s);
453 return result;
454 }
455
456 splx(s);
457
458 deallocate_thread = thread;
459 thread = THREAD_NULL;
460 }
461
462 int result = thread_block(THREAD_CONTINUE_NULL);
463 if (deallocate_thread != THREAD_NULL) {
464 thread_deallocate(deallocate_thread);
465 }
466
467 return result;
468 }
469
470 /*
471 * Depress thread's priority to lowest possible for the specified interval,
472 * with a value of zero resulting in no timeout being scheduled.
473 */
474 void
475 thread_depress_abstime(
476 uint64_t interval)
477 {
478 thread_t self = current_thread();
479 uint64_t deadline;
480 spl_t s;
481
482 s = splsched();
483 thread_lock(self);
484 if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
485 processor_t myprocessor = self->last_processor;
486
487 self->sched_pri = DEPRESSPRI;
488
489 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
490 (uintptr_t)thread_tid(self),
491 self->base_pri,
492 self->sched_pri,
493 0, /* eventually, 'reason' */
494 0);
495
496 myprocessor->current_pri = self->sched_pri;
497 self->sched_flags |= TH_SFLAG_DEPRESS;
498
499 if (interval != 0) {
500 clock_absolutetime_interval_to_deadline(interval, &deadline);
501 if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL))
502 self->depress_timer_active++;
503 }
504 }
505 thread_unlock(self);
506 splx(s);
507 }
508
509 void
510 thread_depress_ms(
511 mach_msg_timeout_t interval)
512 {
513 uint64_t abstime;
514
515 clock_interval_to_absolutetime_interval(
516 interval, NSEC_PER_MSEC, &abstime);
517 thread_depress_abstime(abstime);
518 }
519
520 /*
521 * Priority depression expiration.
522 */
523 void
524 thread_depress_expire(
525 void *p0,
526 __unused void *p1)
527 {
528 thread_t thread = p0;
529 spl_t s;
530
531 s = splsched();
532 thread_lock(thread);
533 if (--thread->depress_timer_active == 0) {
534 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
535 thread_recompute_sched_pri(thread, FALSE);
536 }
537 thread_unlock(thread);
538 splx(s);
539 }
540
541 /*
542 * Prematurely abort priority depression if there is one.
543 */
544 kern_return_t
545 thread_depress_abort_internal(
546 thread_t thread)
547 {
548 kern_return_t result = KERN_NOT_DEPRESSED;
549 spl_t s;
550
551 s = splsched();
552 thread_lock(thread);
553 if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) {
554 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
555 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
556 thread_recompute_sched_pri(thread, FALSE);
557 result = KERN_SUCCESS;
558 }
559
560 if (timer_call_cancel(&thread->depress_timer))
561 thread->depress_timer_active--;
562 }
563 thread_unlock(thread);
564 splx(s);
565
566 return (result);
567 }
568
569 void
570 thread_poll_yield(
571 thread_t self)
572 {
573 spl_t s;
574
575 assert(self == current_thread());
576
577 s = splsched();
578 if (self->sched_mode == TH_MODE_FIXED) {
579 uint64_t total_computation, abstime;
580
581 abstime = mach_absolute_time();
582 total_computation = abstime - self->computation_epoch;
583 total_computation += self->computation_metered;
584 if (total_computation >= max_poll_computation) {
585 processor_t myprocessor = current_processor();
586 ast_t preempt;
587
588 thread_lock(self);
589 if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
590 self->sched_pri = DEPRESSPRI;
591
592 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
593 (uintptr_t)thread_tid(self),
594 self->base_pri,
595 self->sched_pri,
596 0, /* eventually, 'reason' */
597 0);
598
599 myprocessor->current_pri = self->sched_pri;
600 }
601 self->computation_epoch = abstime;
602 self->computation_metered = 0;
603 self->sched_flags |= TH_SFLAG_POLLDEPRESS;
604
605 abstime += (total_computation >> sched_poll_yield_shift);
606 if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL))
607 self->depress_timer_active++;
608
609 if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE)
610 ast_on(preempt);
611
612 thread_unlock(self);
613 }
614 }
615 splx(s);
616 }
617
618
619 void
620 thread_yield_internal(
621 mach_msg_timeout_t ms)
622 {
623 processor_t myprocessor;
624
625 disable_preemption();
626 myprocessor = current_processor();
627 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
628 mp_enable_preemption();
629
630 return;
631 }
632 enable_preemption();
633
634 thread_depress_ms(ms);
635
636 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
637
638 thread_depress_abort_internal(current_thread());
639 }
640
641 /*
642 * This yields to a possible non-urgent preemption pending on the current processor.
643 *
644 * This is useful when doing a long computation in the kernel without returning to userspace.
645 *
646 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
647 */
648 void
649 thread_yield_to_preemption()
650 {
651 /*
652 * ast_pending() should ideally be called with interrupts disabled, but
653 * the check here is fine because csw_check() will do the right thing.
654 */
655 ast_t *pending_ast = ast_pending();
656 ast_t ast = AST_NONE;
657 processor_t p;
658
659 if (*pending_ast & AST_PREEMPT) {
660 thread_t self = current_thread();
661
662 spl_t s = splsched();
663
664 p = current_processor();
665 thread_lock(self);
666 ast = csw_check(p, AST_YIELD);
667 ast_on(ast);
668 thread_unlock(self);
669
670 if (ast != AST_NONE) {
671 (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
672 }
673
674 splx(s);
675 }
676 }
677