]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/syscall_subr.c
a0c1d38e32911ceea1f56c7b665bcb56cb1c20c3
[apple/xnu.git] / osfmk / kern / syscall_subr.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #include <mach/boolean.h>
60 #include <mach/thread_switch.h>
61 #include <ipc/ipc_port.h>
62 #include <ipc/ipc_space.h>
63 #include <kern/counters.h>
64 #include <kern/ipc_kobject.h>
65 #include <kern/processor.h>
66 #include <kern/sched.h>
67 #include <kern/sched_prim.h>
68 #include <kern/spl.h>
69 #include <kern/task.h>
70 #include <kern/thread.h>
71 #include <kern/policy_internal.h>
72
73 #include <mach/policy.h>
74
75 #include <kern/syscall_subr.h>
76 #include <mach/mach_host_server.h>
77 #include <mach/mach_syscalls.h>
78 #include <sys/kdebug.h>
79 #include <kern/ast.h>
80
81 #ifdef MACH_BSD
82 extern void workqueue_thread_yielded(void);
83 extern sched_call_t workqueue_get_sched_callback(void);
84 #endif /* MACH_BSD */
85
86 extern wait_result_t thread_handoff_reason(thread_t thread, ast_t reason);
87
88 /* Called from commpage to take a delayed preemption when exiting
89 * the "Preemption Free Zone" (PFZ).
90 */
91 kern_return_t
92 pfz_exit(
93 __unused struct pfz_exit_args *args)
94 {
95 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
96
97 return (KERN_SUCCESS);
98 }
99
100
101 /*
102 * swtch and swtch_pri both attempt to context switch (logic in
103 * thread_block no-ops the context switch if nothing would happen).
104 * A boolean is returned that indicates whether there is anything
105 * else runnable. That's no excuse to spin, though.
106 */
107
108 static void
109 swtch_continue(void)
110 {
111 processor_t myprocessor;
112 boolean_t result;
113
114 disable_preemption();
115 myprocessor = current_processor();
116 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
117 enable_preemption();
118
119 thread_syscall_return(result);
120 /*NOTREACHED*/
121 }
122
123 boolean_t
124 swtch(
125 __unused struct swtch_args *args)
126 {
127 processor_t myprocessor;
128 boolean_t result;
129
130 disable_preemption();
131 myprocessor = current_processor();
132 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
133 mp_enable_preemption();
134
135 return (FALSE);
136 }
137 enable_preemption();
138
139 counter(c_swtch_block++);
140
141 thread_block_reason((thread_continue_t)swtch_continue, NULL, AST_YIELD);
142
143 disable_preemption();
144 myprocessor = current_processor();
145 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
146 enable_preemption();
147
148 return (result);
149 }
150
151 static void
152 swtch_pri_continue(void)
153 {
154 processor_t myprocessor;
155 boolean_t result;
156
157 thread_depress_abort_internal(current_thread());
158
159 disable_preemption();
160 myprocessor = current_processor();
161 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
162 mp_enable_preemption();
163
164 thread_syscall_return(result);
165 /*NOTREACHED*/
166 }
167
168 boolean_t
169 swtch_pri(
170 __unused struct swtch_pri_args *args)
171 {
172 processor_t myprocessor;
173 boolean_t result;
174
175 disable_preemption();
176 myprocessor = current_processor();
177 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
178 mp_enable_preemption();
179
180 return (FALSE);
181 }
182 enable_preemption();
183
184 counter(c_swtch_pri_block++);
185
186 thread_depress_abstime(thread_depress_time);
187
188 thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD);
189
190 thread_depress_abort_internal(current_thread());
191
192 disable_preemption();
193 myprocessor = current_processor();
194 result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
195 enable_preemption();
196
197 return (result);
198 }
199
200 static boolean_t
201 thread_switch_disable_workqueue_sched_callback(void)
202 {
203 sched_call_t callback = workqueue_get_sched_callback();
204 return thread_disable_sched_call(current_thread(), callback) != NULL;
205 }
206
207 static void
208 thread_switch_enable_workqueue_sched_callback(void)
209 {
210 sched_call_t callback = workqueue_get_sched_callback();
211 thread_reenable_sched_call(current_thread(), callback);
212 }
213
214 static void
215 thread_switch_continue(void)
216 {
217 thread_t self = current_thread();
218 int option = self->saved.swtch.option;
219 boolean_t reenable_workq_callback = self->saved.swtch.reenable_workq_callback;
220
221
222 if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS)
223 thread_depress_abort_internal(self);
224
225 if (reenable_workq_callback)
226 thread_switch_enable_workqueue_sched_callback();
227
228 thread_syscall_return(KERN_SUCCESS);
229 /*NOTREACHED*/
230 }
231
232 /*
233 * thread_switch:
234 *
235 * Context switch. User may supply thread hint.
236 */
237 kern_return_t
238 thread_switch(
239 struct thread_switch_args *args)
240 {
241 thread_t thread = THREAD_NULL;
242 thread_t self = current_thread();
243 mach_port_name_t thread_name = args->thread_name;
244 int option = args->option;
245 mach_msg_timeout_t option_time = args->option_time;
246 uint32_t scale_factor = NSEC_PER_MSEC;
247 boolean_t reenable_workq_callback = FALSE;
248 boolean_t depress_option = FALSE;
249 boolean_t wait_option = FALSE;
250
251 /*
252 * Validate and process option.
253 */
254 switch (option) {
255
256 case SWITCH_OPTION_NONE:
257 workqueue_thread_yielded();
258 break;
259 case SWITCH_OPTION_WAIT:
260 wait_option = TRUE;
261 workqueue_thread_yielded();
262 break;
263 case SWITCH_OPTION_DEPRESS:
264 depress_option = TRUE;
265 workqueue_thread_yielded();
266 break;
267 case SWITCH_OPTION_DISPATCH_CONTENTION:
268 scale_factor = NSEC_PER_USEC;
269 wait_option = TRUE;
270 if (thread_switch_disable_workqueue_sched_callback())
271 reenable_workq_callback = TRUE;
272 break;
273 case SWITCH_OPTION_OSLOCK_DEPRESS:
274 depress_option = TRUE;
275 if (thread_switch_disable_workqueue_sched_callback())
276 reenable_workq_callback = TRUE;
277 break;
278 case SWITCH_OPTION_OSLOCK_WAIT:
279 wait_option = TRUE;
280 if (thread_switch_disable_workqueue_sched_callback())
281 reenable_workq_callback = TRUE;
282 break;
283 default:
284 return (KERN_INVALID_ARGUMENT);
285 }
286
287 /*
288 * Translate the port name if supplied.
289 */
290 if (thread_name != MACH_PORT_NULL) {
291 ipc_port_t port;
292
293 if (ipc_port_translate_send(self->task->itk_space,
294 thread_name, &port) == KERN_SUCCESS) {
295 ip_reference(port);
296 ip_unlock(port);
297
298 thread = convert_port_to_thread(port);
299 ip_release(port);
300
301 if (thread == self) {
302 thread_deallocate(thread);
303 thread = THREAD_NULL;
304 }
305 }
306 }
307
308 if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
309 if (thread != THREAD_NULL) {
310
311 if (thread->task != self->task) {
312 /*
313 * OSLock boosting only applies to other threads
314 * in your same task (even if you have a port for
315 * a thread in another task)
316 */
317
318 thread_deallocate(thread);
319 thread = THREAD_NULL;
320 } else {
321 /*
322 * Attempt to kick the lock owner up to our same IO throttling tier.
323 * If the thread is currently blocked in throttle_lowpri_io(),
324 * it will immediately break out.
325 *
326 * TODO: SFI break out?
327 */
328 int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
329
330 set_thread_iotier_override(thread, new_policy);
331 }
332 }
333 }
334
335 /*
336 * Try to handoff if supplied.
337 */
338 if (thread != THREAD_NULL) {
339 spl_t s = splsched();
340
341 /* This may return a different thread if the target is pushing on something */
342 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
343
344 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
345 thread_tid(thread), thread->state,
346 pulled_thread ? TRUE : FALSE, 0, 0);
347
348 if (pulled_thread != THREAD_NULL) {
349 /* We can't be dropping the last ref here */
350 thread_deallocate_safe(thread);
351
352 if (wait_option)
353 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE,
354 option_time, scale_factor);
355 else if (depress_option)
356 thread_depress_ms(option_time);
357
358 self->saved.swtch.option = option;
359 self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
360
361 thread_run(self, (thread_continue_t)thread_switch_continue, NULL, pulled_thread);
362 /* NOTREACHED */
363 panic("returned from thread_run!");
364 }
365
366 splx(s);
367
368 thread_deallocate(thread);
369 }
370
371 if (wait_option)
372 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor);
373 else if (depress_option)
374 thread_depress_ms(option_time);
375
376 self->saved.swtch.option = option;
377 self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
378
379 thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD);
380
381 if (depress_option)
382 thread_depress_abort_internal(self);
383
384 if (reenable_workq_callback)
385 thread_switch_enable_workqueue_sched_callback();
386
387 return (KERN_SUCCESS);
388 }
389
390 /* Returns a +1 thread reference */
391 thread_t
392 port_name_to_thread_for_ulock(mach_port_name_t thread_name)
393 {
394 thread_t thread = THREAD_NULL;
395 thread_t self = current_thread();
396
397 /*
398 * Translate the port name if supplied.
399 */
400 if (thread_name != MACH_PORT_NULL) {
401 ipc_port_t port;
402
403 if (ipc_port_translate_send(self->task->itk_space,
404 thread_name, &port) == KERN_SUCCESS) {
405 ip_reference(port);
406 ip_unlock(port);
407
408 thread = convert_port_to_thread(port);
409 ip_release(port);
410
411 if (thread == THREAD_NULL) {
412 return thread;
413 }
414
415 if ((thread == self) || (thread->task != self->task)) {
416 thread_deallocate(thread);
417 thread = THREAD_NULL;
418 }
419 }
420 }
421
422 return thread;
423 }
424
425 /* This function is called after an assert_wait(), therefore it must not
426 * cause another wait until after the thread_run() or thread_block()
427 *
428 * Consumes a ref on thread
429 */
430 wait_result_t
431 thread_handoff(thread_t thread)
432 {
433 thread_t deallocate_thread = THREAD_NULL;
434 thread_t self = current_thread();
435
436 /*
437 * Try to handoff if supplied.
438 */
439 if (thread != THREAD_NULL) {
440 spl_t s = splsched();
441
442 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
443
444 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
445 thread_tid(thread), thread->state,
446 pulled_thread ? TRUE : FALSE, 0, 0);
447
448 if (pulled_thread != THREAD_NULL) {
449 /* We can't be dropping the last ref here */
450 thread_deallocate_safe(thread);
451
452 int result = thread_run(self, THREAD_CONTINUE_NULL, NULL, pulled_thread);
453
454 splx(s);
455 return result;
456 }
457
458 splx(s);
459
460 deallocate_thread = thread;
461 thread = THREAD_NULL;
462 }
463
464 int result = thread_block(THREAD_CONTINUE_NULL);
465 if (deallocate_thread != THREAD_NULL) {
466 thread_deallocate(deallocate_thread);
467 }
468
469 return result;
470 }
471
472 /*
473 * Depress thread's priority to lowest possible for the specified interval,
474 * with a value of zero resulting in no timeout being scheduled.
475 */
476 void
477 thread_depress_abstime(
478 uint64_t interval)
479 {
480 thread_t self = current_thread();
481 uint64_t deadline;
482 spl_t s;
483
484 s = splsched();
485 thread_lock(self);
486 if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
487 processor_t myprocessor = self->last_processor;
488
489 self->sched_pri = DEPRESSPRI;
490
491 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
492 (uintptr_t)thread_tid(self),
493 self->base_pri,
494 self->sched_pri,
495 0, /* eventually, 'reason' */
496 0);
497
498 myprocessor->current_pri = self->sched_pri;
499 self->sched_flags |= TH_SFLAG_DEPRESS;
500
501 if (interval != 0) {
502 clock_absolutetime_interval_to_deadline(interval, &deadline);
503 if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL))
504 self->depress_timer_active++;
505 }
506 }
507 thread_unlock(self);
508 splx(s);
509 }
510
511 void
512 thread_depress_ms(
513 mach_msg_timeout_t interval)
514 {
515 uint64_t abstime;
516
517 clock_interval_to_absolutetime_interval(
518 interval, NSEC_PER_MSEC, &abstime);
519 thread_depress_abstime(abstime);
520 }
521
522 /*
523 * Priority depression expiration.
524 */
525 void
526 thread_depress_expire(
527 void *p0,
528 __unused void *p1)
529 {
530 thread_t thread = p0;
531 spl_t s;
532
533 s = splsched();
534 thread_lock(thread);
535 if (--thread->depress_timer_active == 0) {
536 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
537 thread_recompute_sched_pri(thread, FALSE);
538 }
539 thread_unlock(thread);
540 splx(s);
541 }
542
543 /*
544 * Prematurely abort priority depression if there is one.
545 */
546 kern_return_t
547 thread_depress_abort_internal(
548 thread_t thread)
549 {
550 kern_return_t result = KERN_NOT_DEPRESSED;
551 spl_t s;
552
553 s = splsched();
554 thread_lock(thread);
555 if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) {
556 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
557 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
558 thread_recompute_sched_pri(thread, FALSE);
559 result = KERN_SUCCESS;
560 }
561
562 if (timer_call_cancel(&thread->depress_timer))
563 thread->depress_timer_active--;
564 }
565 thread_unlock(thread);
566 splx(s);
567
568 return (result);
569 }
570
571 void
572 thread_poll_yield(
573 thread_t self)
574 {
575 spl_t s;
576
577 assert(self == current_thread());
578
579 s = splsched();
580 if (self->sched_mode == TH_MODE_FIXED) {
581 uint64_t total_computation, abstime;
582
583 abstime = mach_absolute_time();
584 total_computation = abstime - self->computation_epoch;
585 total_computation += self->computation_metered;
586 if (total_computation >= max_poll_computation) {
587 processor_t myprocessor = current_processor();
588 ast_t preempt;
589
590 thread_lock(self);
591 if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
592 self->sched_pri = DEPRESSPRI;
593
594 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
595 (uintptr_t)thread_tid(self),
596 self->base_pri,
597 self->sched_pri,
598 0, /* eventually, 'reason' */
599 0);
600
601 myprocessor->current_pri = self->sched_pri;
602 }
603 self->computation_epoch = abstime;
604 self->computation_metered = 0;
605 self->sched_flags |= TH_SFLAG_POLLDEPRESS;
606
607 abstime += (total_computation >> sched_poll_yield_shift);
608 if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL))
609 self->depress_timer_active++;
610
611 if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE)
612 ast_on(preempt);
613
614 thread_unlock(self);
615 }
616 }
617 splx(s);
618 }
619
620
621 void
622 thread_yield_internal(
623 mach_msg_timeout_t ms)
624 {
625 processor_t myprocessor;
626
627 disable_preemption();
628 myprocessor = current_processor();
629 if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
630 mp_enable_preemption();
631
632 return;
633 }
634 enable_preemption();
635
636 thread_depress_ms(ms);
637
638 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
639
640 thread_depress_abort_internal(current_thread());
641 }
642
643 /*
644 * This yields to a possible non-urgent preemption pending on the current processor.
645 *
646 * This is useful when doing a long computation in the kernel without returning to userspace.
647 *
648 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
649 */
650 void
651 thread_yield_to_preemption()
652 {
653 /*
654 * ast_pending() should ideally be called with interrupts disabled, but
655 * the check here is fine because csw_check() will do the right thing.
656 */
657 ast_t *pending_ast = ast_pending();
658 ast_t ast = AST_NONE;
659 processor_t p;
660
661 if (*pending_ast & AST_PREEMPT) {
662 thread_t self = current_thread();
663
664 spl_t s = splsched();
665
666 p = current_processor();
667 thread_lock(self);
668 ast = csw_check(p, AST_YIELD);
669 ast_on(ast);
670 thread_unlock(self);
671
672 if (ast != AST_NONE) {
673 (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
674 }
675
676 splx(s);
677 }
678 }
679