X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6601e61aa18bf4f09af135ff61fc7f4771d23b06..cc8bc92ae4a8e9f1a1ab61bf83d34ad8150b3405:/osfmk/kern/syscall_subr.c diff --git a/osfmk/kern/syscall_subr.c b/osfmk/kern/syscall_subr.c index b40d4a63d..e944c79d9 100644 --- a/osfmk/kern/syscall_subr.c +++ b/osfmk/kern/syscall_subr.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -62,36 +68,52 @@ #include #include #include +#include + #include #include #include #include +#include +#include + +#ifdef MACH_BSD +extern void workqueue_thread_yielded(void); +extern sched_call_t workqueue_get_sched_callback(void); +#endif /* MACH_BSD */ + +extern wait_result_t thread_handoff_reason(thread_t thread, ast_t reason); + +/* Called from commpage to take a delayed preemption when exiting + * the "Preemption Free Zone" (PFZ). + */ +kern_return_t +pfz_exit( +__unused struct pfz_exit_args *args) +{ + /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */ + + return (KERN_SUCCESS); +} + /* * swtch and swtch_pri both attempt to context switch (logic in * thread_block no-ops the context switch if nothing would happen). * A boolean is returned that indicates whether there is anything - * else runnable. - * - * This boolean can be used by a thread waiting on a - * lock or condition: If FALSE is returned, the thread is justified - * in becoming a resource hog by continuing to spin because there's - * nothing else useful that the processor could do. If TRUE is - * returned, the thread should make one more check on the - * lock and then be a good citizen and really suspend. + * else runnable. That's no excuse to spin, though. */ static void swtch_continue(void) { - register processor_t myprocessor; - boolean_t result; + processor_t myprocessor; + boolean_t result; - disable_preemption(); + disable_preemption(); myprocessor = current_processor(); - result = myprocessor->runq.count > 0 || - myprocessor->processor_set->runq.count > 0; + result = SCHED(thread_should_yield)(myprocessor, current_thread()); enable_preemption(); thread_syscall_return(result); @@ -102,13 +124,12 @@ boolean_t swtch( __unused struct swtch_args *args) { - register processor_t myprocessor; + processor_t myprocessor; boolean_t result; disable_preemption(); myprocessor = current_processor(); - if ( myprocessor->runq.count == 0 && - myprocessor->processor_set->runq.count == 0 ) { + if (!SCHED(thread_should_yield)(myprocessor, current_thread())) { mp_enable_preemption(); return (FALSE); @@ -121,8 +142,7 @@ swtch( disable_preemption(); myprocessor = current_processor(); - result = myprocessor->runq.count > 0 || - myprocessor->processor_set->runq.count > 0; + result = SCHED(thread_should_yield)(myprocessor, current_thread()); enable_preemption(); return (result); @@ -131,15 +151,14 @@ swtch( static void swtch_pri_continue(void) { - register processor_t myprocessor; - boolean_t result; + processor_t myprocessor; + boolean_t result; thread_depress_abort_internal(current_thread()); - disable_preemption(); + disable_preemption(); myprocessor = current_processor(); - result = myprocessor->runq.count > 0 || - myprocessor->processor_set->runq.count > 0; + result = SCHED(thread_should_yield)(myprocessor, current_thread()); mp_enable_preemption(); thread_syscall_return(result); @@ -150,13 +169,12 @@ boolean_t swtch_pri( __unused struct swtch_pri_args *args) { - register processor_t myprocessor; + processor_t myprocessor; boolean_t result; disable_preemption(); myprocessor = current_processor(); - if ( myprocessor->runq.count == 0 && - myprocessor->processor_set->runq.count == 0 ) { + if (!SCHED(thread_should_yield)(myprocessor, current_thread())) { mp_enable_preemption(); return (FALSE); @@ -165,7 +183,7 @@ __unused struct swtch_pri_args *args) counter(c_swtch_pri_block++); - thread_depress_abstime(std_quantum); + thread_depress_abstime(thread_depress_time); thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD); @@ -173,22 +191,40 @@ __unused struct swtch_pri_args *args) disable_preemption(); myprocessor = current_processor(); - result = myprocessor->runq.count > 0 || - myprocessor->processor_set->runq.count > 0; + result = SCHED(thread_should_yield)(myprocessor, current_thread()); enable_preemption(); return (result); } +static boolean_t +thread_switch_disable_workqueue_sched_callback(void) +{ + sched_call_t callback = workqueue_get_sched_callback(); + return thread_disable_sched_call(current_thread(), callback) != NULL; +} + +static void +thread_switch_enable_workqueue_sched_callback(void) +{ + sched_call_t callback = workqueue_get_sched_callback(); + thread_reenable_sched_call(current_thread(), callback); +} + static void thread_switch_continue(void) { - register thread_t self = current_thread(); + thread_t self = current_thread(); int option = self->saved.swtch.option; + boolean_t reenable_workq_callback = self->saved.swtch.reenable_workq_callback; + - if (option == SWITCH_OPTION_DEPRESS) + if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS) thread_depress_abort_internal(self); + if (reenable_workq_callback) + thread_switch_enable_workqueue_sched_callback(); + thread_syscall_return(KERN_SUCCESS); /*NOTREACHED*/ } @@ -202,21 +238,48 @@ kern_return_t thread_switch( struct thread_switch_args *args) { - register thread_t thread, self = current_thread(); + thread_t thread = THREAD_NULL; + thread_t self = current_thread(); mach_port_name_t thread_name = args->thread_name; int option = args->option; mach_msg_timeout_t option_time = args->option_time; + uint32_t scale_factor = NSEC_PER_MSEC; + boolean_t reenable_workq_callback = FALSE; + boolean_t depress_option = FALSE; + boolean_t wait_option = FALSE; /* - * Process option. + * Validate and process option. */ switch (option) { case SWITCH_OPTION_NONE: - case SWITCH_OPTION_DEPRESS: + workqueue_thread_yielded(); + break; case SWITCH_OPTION_WAIT: - break; - + wait_option = TRUE; + workqueue_thread_yielded(); + break; + case SWITCH_OPTION_DEPRESS: + depress_option = TRUE; + workqueue_thread_yielded(); + break; + case SWITCH_OPTION_DISPATCH_CONTENTION: + scale_factor = NSEC_PER_USEC; + wait_option = TRUE; + if (thread_switch_disable_workqueue_sched_callback()) + reenable_workq_callback = TRUE; + break; + case SWITCH_OPTION_OSLOCK_DEPRESS: + depress_option = TRUE; + if (thread_switch_disable_workqueue_sched_callback()) + reenable_workq_callback = TRUE; + break; + case SWITCH_OPTION_OSLOCK_WAIT: + wait_option = TRUE; + if (thread_switch_disable_workqueue_sched_callback()) + reenable_workq_callback = TRUE; + break; default: return (KERN_INVALID_ARGUMENT); } @@ -224,95 +287,188 @@ thread_switch( /* * Translate the port name if supplied. */ - if (thread_name != MACH_PORT_NULL) { - ipc_port_t port; + if (thread_name != MACH_PORT_NULL) { + ipc_port_t port; if (ipc_port_translate_send(self->task->itk_space, - thread_name, &port) == KERN_SUCCESS) { + thread_name, &port) == KERN_SUCCESS) { ip_reference(port); ip_unlock(port); thread = convert_port_to_thread(port); - ipc_port_release(port); + ip_release(port); if (thread == self) { - thread_deallocate_internal(thread); + thread_deallocate(thread); thread = THREAD_NULL; } } - else - thread = THREAD_NULL; } - else - thread = THREAD_NULL; + + if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) { + if (thread != THREAD_NULL) { + + if (thread->task != self->task) { + /* + * OSLock boosting only applies to other threads + * in your same task (even if you have a port for + * a thread in another task) + */ + + thread_deallocate(thread); + thread = THREAD_NULL; + } else { + /* + * Attempt to kick the lock owner up to our same IO throttling tier. + * If the thread is currently blocked in throttle_lowpri_io(), + * it will immediately break out. + * + * TODO: SFI break out? + */ + int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO); + + set_thread_iotier_override(thread, new_policy); + } + } + } /* * Try to handoff if supplied. */ if (thread != THREAD_NULL) { - processor_t processor; - spl_t s; - - s = splsched(); - thread_lock(thread); - - /* - * Check if the thread is in the right pset, - * is not bound to a different processor, - * and that realtime is not involved. - * - * Next, pull it off its run queue. If it - * doesn't come, it's not eligible. - */ - processor = current_processor(); - if (processor->current_pri < BASEPRI_RTQUEUES && - thread->sched_pri < BASEPRI_RTQUEUES && - thread->processor_set == processor->processor_set && - (thread->bound_processor == PROCESSOR_NULL || - thread->bound_processor == processor) && - run_queue_remove(thread) != RUN_QUEUE_NULL ) { - /* - * Hah, got it!! - */ - thread_unlock(thread); - - thread_deallocate_internal(thread); - - if (option == SWITCH_OPTION_WAIT) + spl_t s = splsched(); + + /* This may return a different thread if the target is pushing on something */ + thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE, + thread_tid(thread), thread->state, + pulled_thread ? TRUE : FALSE, 0, 0); + + if (pulled_thread != THREAD_NULL) { + /* We can't be dropping the last ref here */ + thread_deallocate_safe(thread); + + if (wait_option) assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, - option_time, 1000*NSEC_PER_USEC); - else - if (option == SWITCH_OPTION_DEPRESS) + option_time, scale_factor); + else if (depress_option) thread_depress_ms(option_time); self->saved.swtch.option = option; + self->saved.swtch.reenable_workq_callback = reenable_workq_callback; - thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread); + thread_run(self, (thread_continue_t)thread_switch_continue, NULL, pulled_thread); /* NOTREACHED */ + panic("returned from thread_run!"); } - thread_unlock(thread); splx(s); thread_deallocate(thread); } - - if (option == SWITCH_OPTION_WAIT) - assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, 1000*NSEC_PER_USEC); - else - if (option == SWITCH_OPTION_DEPRESS) + + if (wait_option) + assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor); + else if (depress_option) thread_depress_ms(option_time); - + self->saved.swtch.option = option; + self->saved.swtch.reenable_workq_callback = reenable_workq_callback; thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD); - if (option == SWITCH_OPTION_DEPRESS) + if (depress_option) thread_depress_abort_internal(self); + if (reenable_workq_callback) + thread_switch_enable_workqueue_sched_callback(); + return (KERN_SUCCESS); } +/* Returns a +1 thread reference */ +thread_t +port_name_to_thread_for_ulock(mach_port_name_t thread_name) +{ + thread_t thread = THREAD_NULL; + thread_t self = current_thread(); + + /* + * Translate the port name if supplied. + */ + if (thread_name != MACH_PORT_NULL) { + ipc_port_t port; + + if (ipc_port_translate_send(self->task->itk_space, + thread_name, &port) == KERN_SUCCESS) { + ip_reference(port); + ip_unlock(port); + + thread = convert_port_to_thread(port); + ip_release(port); + + if (thread == THREAD_NULL) { + return thread; + } + + if ((thread == self) || (thread->task != self->task)) { + thread_deallocate(thread); + thread = THREAD_NULL; + } + } + } + + return thread; +} + +/* This function is called after an assert_wait(), therefore it must not + * cause another wait until after the thread_run() or thread_block() + * + * Consumes a ref on thread + */ +wait_result_t +thread_handoff(thread_t thread) +{ + thread_t deallocate_thread = THREAD_NULL; + thread_t self = current_thread(); + + /* + * Try to handoff if supplied. + */ + if (thread != THREAD_NULL) { + spl_t s = splsched(); + + thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE, + thread_tid(thread), thread->state, + pulled_thread ? TRUE : FALSE, 0, 0); + + if (pulled_thread != THREAD_NULL) { + /* We can't be dropping the last ref here */ + thread_deallocate_safe(thread); + + int result = thread_run(self, THREAD_CONTINUE_NULL, NULL, pulled_thread); + + splx(s); + return result; + } + + splx(s); + + deallocate_thread = thread; + thread = THREAD_NULL; + } + + int result = thread_block(THREAD_CONTINUE_NULL); + if (deallocate_thread != THREAD_NULL) { + thread_deallocate(deallocate_thread); + } + + return result; +} + /* * Depress thread's priority to lowest possible for the specified interval, * with a value of zero resulting in no timeout being scheduled. @@ -321,23 +477,31 @@ void thread_depress_abstime( uint64_t interval) { - register thread_t self = current_thread(); + thread_t self = current_thread(); uint64_t deadline; spl_t s; s = splsched(); thread_lock(self); - if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) { + if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) { processor_t myprocessor = self->last_processor; self->sched_pri = DEPRESSPRI; + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY), + (uintptr_t)thread_tid(self), + self->base_pri, + self->sched_pri, + 0, /* eventually, 'reason' */ + 0); + myprocessor->current_pri = self->sched_pri; - self->sched_mode &= ~TH_MODE_PREEMPT; - self->sched_mode |= TH_MODE_DEPRESS; + myprocessor->current_perfctl_class = thread_get_perfcontrol_class(self); + self->sched_flags |= TH_SFLAG_DEPRESS; if (interval != 0) { clock_absolutetime_interval_to_deadline(interval, &deadline); - if (!timer_call_enter(&self->depress_timer, deadline)) + if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL)) self->depress_timer_active++; } } @@ -352,7 +516,7 @@ thread_depress_ms( uint64_t abstime; clock_interval_to_absolutetime_interval( - interval, 1000*NSEC_PER_USEC, &abstime); + interval, NSEC_PER_MSEC, &abstime); thread_depress_abstime(abstime); } @@ -370,8 +534,8 @@ thread_depress_expire( s = splsched(); thread_lock(thread); if (--thread->depress_timer_active == 0) { - thread->sched_mode &= ~TH_MODE_ISDEPRESSED; - compute_priority(thread, FALSE); + thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; + thread_recompute_sched_pri(thread, FALSE); } thread_unlock(thread); splx(s); @@ -389,10 +553,10 @@ thread_depress_abort_internal( s = splsched(); thread_lock(thread); - if (!(thread->sched_mode & TH_MODE_POLLDEPRESS)) { - if (thread->sched_mode & TH_MODE_ISDEPRESSED) { - thread->sched_mode &= ~TH_MODE_ISDEPRESSED; - compute_priority(thread, FALSE); + if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) { + if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { + thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK; + thread_recompute_sched_pri(thread, FALSE); result = KERN_SUCCESS; } @@ -414,7 +578,7 @@ thread_poll_yield( assert(self == current_thread()); s = splsched(); - if (!(self->sched_mode & (TH_MODE_REALTIME|TH_MODE_TIMESHARE))) { + if (self->sched_mode == TH_MODE_FIXED) { uint64_t total_computation, abstime; abstime = mach_absolute_time(); @@ -425,23 +589,93 @@ thread_poll_yield( ast_t preempt; thread_lock(self); - if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) { + if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) { self->sched_pri = DEPRESSPRI; + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY), + (uintptr_t)thread_tid(self), + self->base_pri, + self->sched_pri, + 0, /* eventually, 'reason' */ + 0); + myprocessor->current_pri = self->sched_pri; - self->sched_mode &= ~TH_MODE_PREEMPT; + myprocessor->current_perfctl_class = thread_get_perfcontrol_class(self); } self->computation_epoch = abstime; self->computation_metered = 0; - self->sched_mode |= TH_MODE_POLLDEPRESS; + self->sched_flags |= TH_SFLAG_POLLDEPRESS; abstime += (total_computation >> sched_poll_yield_shift); - if (!timer_call_enter(&self->depress_timer, abstime)) + if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL)) self->depress_timer_active++; - thread_unlock(self); - if ((preempt = csw_check(self, myprocessor)) != AST_NONE) + if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE) ast_on(preempt); + + thread_unlock(self); } } splx(s); } + + +void +thread_yield_internal( + mach_msg_timeout_t ms) +{ + processor_t myprocessor; + + disable_preemption(); + myprocessor = current_processor(); + if (!SCHED(thread_should_yield)(myprocessor, current_thread())) { + mp_enable_preemption(); + + return; + } + enable_preemption(); + + thread_depress_ms(ms); + + thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD); + + thread_depress_abort_internal(current_thread()); +} + +/* + * This yields to a possible non-urgent preemption pending on the current processor. + * + * This is useful when doing a long computation in the kernel without returning to userspace. + * + * As opposed to other yielding mechanisms, this does not drop the priority of the current thread. + */ +void +thread_yield_to_preemption() +{ + /* + * ast_pending() should ideally be called with interrupts disabled, but + * the check here is fine because csw_check() will do the right thing. + */ + ast_t *pending_ast = ast_pending(); + ast_t ast = AST_NONE; + processor_t p; + + if (*pending_ast & AST_PREEMPT) { + thread_t self = current_thread(); + + spl_t s = splsched(); + + p = current_processor(); + thread_lock(self); + ast = csw_check(p, AST_YIELD); + ast_on(ast); + thread_unlock(self); + + if (ast != AST_NONE) { + (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast); + } + + splx(s); + } +} +