X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/fe8ab488e9161c46dd9885d58fc52996dc0249ff..a39ff7e25e19b3a8c3020042a3872ca9ec9659f1:/osfmk/kperf/kperfbsd.c diff --git a/osfmk/kperf/kperfbsd.c b/osfmk/kperf/kperfbsd.c index d712fd0d0..b89125126 100644 --- a/osfmk/kperf/kperfbsd.c +++ b/osfmk/kperf/kperfbsd.c @@ -2,7 +2,7 @@ * Copyright (c) 2011 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,505 +22,477 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* sysctl interface for paramters from user-land */ +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include +#include +#include #include +#include +#include +#include - -/* a pid which is allowed to control kperf without requiring root access */ -static pid_t blessed_pid = -1; -static boolean_t blessed_preempt = FALSE; +#include /* IDs for dispatch from SYSCTL macros */ -#define REQ_SAMPLING (1) -#define REQ_ACTION_COUNT (2) -#define REQ_ACTION_SAMPLERS (3) -#define REQ_TIMER_COUNT (4) -#define REQ_TIMER_PERIOD (5) -#define REQ_TIMER_PET (6) -#define REQ_TIMER_ACTION (7) -#define REQ_BLESS (8) -#define REQ_ACTION_USERDATA (9) -#define REQ_ACTION_FILTER_BY_TASK (10) -#define REQ_ACTION_FILTER_BY_PID (11) -#define REQ_KDBG_CALLSTACKS (12) -#define REQ_PET_IDLE_RATE (13) -#define REQ_BLESS_PREEMPT (14) - -/* simple state variables */ -int kperf_debug_level = 0; +#define REQ_SAMPLING (1) +#define REQ_ACTION_COUNT (2) +#define REQ_ACTION_SAMPLERS (3) +#define REQ_TIMER_COUNT (4) +#define REQ_TIMER_PERIOD (5) +#define REQ_TIMER_PET (6) +#define REQ_TIMER_ACTION (7) +#define REQ_BLESS (8) +#define REQ_ACTION_USERDATA (9) +#define REQ_ACTION_FILTER_BY_TASK (10) +#define REQ_ACTION_FILTER_BY_PID (11) +/* 12 unused */ +#define REQ_PET_IDLE_RATE (13) +#define REQ_BLESS_PREEMPT (14) +#define REQ_KDBG_CSWITCH (15) +#define REQ_RESET (16) +/* 17 unused */ +#define REQ_ACTION_UCALLSTACK_DEPTH (18) +#define REQ_ACTION_KCALLSTACK_DEPTH (19) +#define REQ_LIGHTWEIGHT_PET (20) +#define REQ_KDEBUG_ACTION (21) +#define REQ_KDEBUG_FILTER (22) -static lck_grp_attr_t *kperf_cfg_lckgrp_attr = NULL; -static lck_grp_t *kperf_cfg_lckgrp = NULL; -static lck_mtx_t kperf_cfg_lock; -static boolean_t kperf_cfg_initted = FALSE; +int kperf_debug_level = 0; -void kdbg_swap_global_state_pid(pid_t old_pid, pid_t new_pid); /* bsd/kern/kdebug.c */ +#if DEVELOPMENT || DEBUG +_Atomic long long kperf_pending_ipis = 0; +#endif /* DEVELOPMENT || DEBUG */ -/*************************** +/* + * kperf has a different sysctl model than others. + * + * For simple queries like the number of actions, the normal sysctl style + * of get/set works well. * - * lock init + * However, when requesting information about something specific, like an + * action, user space needs to provide some contextual information. This + * information is stored in a uint64_t array that includes the context, like + * the action ID it is interested in. If user space is getting the value from + * the kernel, then the get side of the sysctl is valid. If it is setting the + * value, then the get pointers are left NULL. * - ***************************/ + * These functions handle marshalling and unmarshalling data from sysctls. + */ -void -kperf_bootstrap(void) +static int +kperf_sysctl_get_set_uint32(struct sysctl_req *req, + uint32_t (*get)(void), int (*set)(uint32_t)) { - kperf_cfg_lckgrp_attr = lck_grp_attr_alloc_init(); - kperf_cfg_lckgrp = lck_grp_alloc_init("kperf cfg", - kperf_cfg_lckgrp_attr); - lck_mtx_init(&kperf_cfg_lock, kperf_cfg_lckgrp, LCK_ATTR_NULL); + assert(req != NULL); + assert(get != NULL); + assert(set != NULL); + + uint32_t value = 0; + if (req->oldptr) { + value = get(); + } - kperf_cfg_initted = TRUE; + int error = sysctl_io_number(req, value, sizeof(value), &value, NULL); + + if (error || !req->newptr) { + return error; + } + + return set(value); } -/*************************** - * - * sysctl handlers - * - ***************************/ +static int +kperf_sysctl_get_set_int(struct sysctl_req *req, + int (*get)(void), int (*set)(int)) +{ + assert(req != NULL); + assert(get != NULL); + assert(set != NULL); + + int value = 0; + if (req->oldptr) { + value = get(); + } + + int error = sysctl_io_number(req, value, sizeof(value), &value, NULL); + + if (error || !req->newptr) { + return error; + } + + return set(value); +} static int -sysctl_timer_period( __unused struct sysctl_oid *oidp, struct sysctl_req *req ) +kperf_sysctl_get_set_unsigned_uint32(struct sysctl_req *req, + int (*get)(unsigned int, uint32_t *), int (*set)(unsigned int, uint32_t)) { - int error = 0; - uint64_t inputs[2], retval; - unsigned timer, set = 0; - - /* get 2x 64-bit words */ - error = SYSCTL_IN( req, inputs, 2*sizeof(inputs[0]) ); - if(error) - return (error); - - /* setup inputs */ - timer = (unsigned) inputs[0]; - if( inputs[1] != ~0ULL ) - set = 1; - - if( set ) - { - error = kperf_timer_set_period( timer, inputs[1] ); - if( error ) - return error; - } - - error = kperf_timer_get_period(timer, &retval); - if(error) - return (error); - - inputs[1] = retval; - - if( error == 0 ) - error = SYSCTL_OUT( req, inputs, 2*sizeof(inputs[0]) ); - - return error; + assert(req != NULL); + assert(get != NULL); + assert(set != NULL); + + int error = 0; + uint64_t inputs[2] = {}; + + if (req->newptr == USER_ADDR_NULL) { + return EFAULT; + } + + if ((error = copyin(req->newptr, inputs, sizeof(inputs)))) { + return error; + } + + unsigned int action_id = (unsigned int)inputs[0]; + uint32_t new_value = (uint32_t)inputs[1]; + + if (req->oldptr != USER_ADDR_NULL) { + uint32_t value_out = 0; + if ((error = get(action_id, &value_out))) { + return error; + } + + inputs[1] = value_out; + + return copyout(inputs, req->oldptr, sizeof(inputs)); + } else { + return set(action_id, new_value); + } } +/* + * These functions are essentially the same as the generic + * kperf_sysctl_get_set_unsigned_uint32, except they have unique input sizes. + */ + static int -sysctl_timer_action( __unused struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_timer_period(struct sysctl_req *req) { - int error = 0; - uint64_t inputs[2]; - uint32_t retval; - unsigned timer, set = 0; - - /* get 2x 64-bit words */ - error = SYSCTL_IN( req, inputs, 2*sizeof(inputs[0]) ); - if(error) - return (error); - - /* setup inputs */ - timer = (unsigned) inputs[0]; - if( inputs[1] != ~0ULL ) - set = 1; - - if( set ) - { - error = kperf_timer_set_action( timer, inputs[1] ); - if( error ) - return error; - } - - error = kperf_timer_get_action(timer, &retval); - if(error) - return (error); - - inputs[1] = retval; - - if( error == 0 ) - error = SYSCTL_OUT( req, inputs, 2*sizeof(inputs[0]) ); - - return error; + int error; + uint64_t inputs[2] = {}; + + assert(req != NULL); + + if (req->newptr == USER_ADDR_NULL) { + return EFAULT; + } + + if ((error = copyin(req->newptr, inputs, sizeof(inputs)))) { + return error; + } + + unsigned int timer = (unsigned int)inputs[0]; + uint64_t new_period = inputs[1]; + + if (req->oldptr != USER_ADDR_NULL) { + uint64_t period_out = 0; + if ((error = kperf_timer_get_period(timer, &period_out))) { + return error; + } + + inputs[1] = period_out; + + return copyout(inputs, req->oldptr, sizeof(inputs)); + } else { + return kperf_timer_set_period(timer, new_period); + } } static int -sysctl_action_samplers( __unused struct sysctl_oid *oidp, - struct sysctl_req *req ) +sysctl_action_filter(struct sysctl_req *req, bool is_task_t) { - int error = 0; - uint64_t inputs[3]; - uint32_t retval; - unsigned actionid, set = 0; - - /* get 3x 64-bit words */ - error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) ); - if(error) - return (error); - - /* setup inputs */ - set = (unsigned) inputs[0]; - actionid = (unsigned) inputs[1]; - - if( set ) - { - error = kperf_action_set_samplers( actionid, inputs[2] ); - if( error ) - return error; - } - - error = kperf_action_get_samplers(actionid, &retval); - if(error) - return (error); - - inputs[2] = retval; - - if( error == 0 ) - error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) ); - - return error; + int error = 0; + uint64_t inputs[2] = {}; + + assert(req != NULL); + + if (req->newptr == USER_ADDR_NULL) { + return EFAULT; + } + + if ((error = copyin(req->newptr, inputs, sizeof(inputs)))) { + return error; + } + + unsigned int actionid = (unsigned int)inputs[0]; + int new_filter = (int)inputs[1]; + + if (req->oldptr != USER_ADDR_NULL) { + int filter_out; + if ((error = kperf_action_get_filter(actionid, &filter_out))) { + return error; + } + + inputs[1] = filter_out; + return copyout(inputs, req->oldptr, sizeof(inputs)); + } else { + int pid = is_task_t ? kperf_port_to_pid((mach_port_name_t)new_filter) + : new_filter; + + return kperf_action_set_filter(actionid, pid); + } } static int -sysctl_action_userdata( __unused struct sysctl_oid *oidp, - struct sysctl_req *req ) +sysctl_bless(struct sysctl_req *req) { - int error = 0; - uint64_t inputs[3]; - uint32_t retval; - unsigned actionid, set = 0; - - /* get 3x 64-bit words */ - error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) ); - if(error) - return (error); - - /* setup inputs */ - set = (unsigned) inputs[0]; - actionid = (unsigned) inputs[1]; - - if( set ) - { - error = kperf_action_set_userdata( actionid, inputs[2] ); - if( error ) - return error; - } - - error = kperf_action_get_userdata(actionid, &retval); - if(error) - return (error); - - inputs[2] = retval; - - if( error == 0 ) - error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) ); - - return error; + int value = ktrace_get_owning_pid(); + int error = sysctl_io_number(req, value, sizeof(value), &value, NULL); + + if (error || !req->newptr) { + return error; + } + + return ktrace_set_owning_pid(value); } +/* sysctl handlers that use the generic functions */ + static int -sysctl_action_filter( __unused struct sysctl_oid *oidp, - struct sysctl_req *req, int is_task_t ) +sysctl_action_samplers(struct sysctl_req *req) { - int error = 0; - uint64_t inputs[3]; - int retval; - unsigned actionid, set = 0; - mach_port_name_t portname; - int pid; - - /* get 3x 64-bit words */ - error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) ); - if(error) - return (error); - - /* setup inputs */ - set = (unsigned) inputs[0]; - actionid = (unsigned) inputs[1]; - - if( set ) - { - if( is_task_t ) - { - portname = (mach_port_name_t) inputs[2]; - pid = kperf_port_to_pid(portname); - } - else - pid = (int) inputs[2]; - - error = kperf_action_set_filter( actionid, pid ); - if( error ) - return error; - } - - error = kperf_action_get_filter(actionid, &retval); - if(error) - return (error); - - inputs[2] = retval; - - if( error == 0 ) - error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) ); - - return error; + return kperf_sysctl_get_set_unsigned_uint32(req, + kperf_action_get_samplers, kperf_action_set_samplers); } static int -sysctl_sampling( struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_action_userdata(struct sysctl_req *req) { - int error = 0; - uint32_t value = 0; - - /* get the old value and process it */ - value = kperf_sampling_status(); - - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); - - /* if that worked, and we're writing... */ - if( value ) - error = kperf_sampling_enable(); - else - error = kperf_sampling_disable(); - - return error; + return kperf_sysctl_get_set_unsigned_uint32(req, + kperf_action_get_userdata, kperf_action_set_userdata); } static int -sysctl_action_count( struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_action_ucallstack_depth(struct sysctl_req *req) { - int error = 0; - uint32_t value = 0; - - /* get the old value and process it */ - value = kperf_action_get_count(); - - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); - - /* if that worked, and we're writing... */ - return kperf_action_set_count(value); + return kperf_sysctl_get_set_unsigned_uint32(req, + kperf_action_get_ucallstack_depth, kperf_action_set_ucallstack_depth); } static int -sysctl_timer_count( struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_action_kcallstack_depth(struct sysctl_req *req) { - int error = 0; - uint32_t value = 0; - - /* get the old value and process it */ - value = kperf_timer_get_count(); - - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); - - /* if that worked, and we're writing... */ - return kperf_timer_set_count(value); + return kperf_sysctl_get_set_unsigned_uint32(req, + kperf_action_get_kcallstack_depth, kperf_action_set_kcallstack_depth); } static int -sysctl_timer_pet( struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_kdebug_action(struct sysctl_req *req) { - int error = 0; - uint32_t value = 0; - - /* get the old value and process it */ - value = kperf_timer_get_petid(); - - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); - - /* if that worked, and we're writing... */ - return kperf_timer_set_petid(value); + return kperf_sysctl_get_set_int(req, kperf_kdebug_get_action, + kperf_kdebug_set_action); } static int -sysctl_bless( struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_kdebug_filter(struct sysctl_req *req) { - int error = 0; - int value = 0; + assert(req != NULL); - /* get the old value and process it */ - value = blessed_pid; + if (req->oldptr != USER_ADDR_NULL) { + struct kperf_kdebug_filter *filter = NULL; + uint32_t n_debugids = kperf_kdebug_get_filter(&filter); + size_t filter_size = KPERF_KDEBUG_FILTER_SIZE(n_debugids); - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); + if (n_debugids == 0) { + return EINVAL; + } - /* if that worked, and we're writing... */ - error = kperf_bless_pid(value); + return SYSCTL_OUT(req, filter, filter_size); + } - return error; + return kperf_kdebug_set_filter(req->newptr, (uint32_t)req->newlen); } static int -sysctl_bless_preempt( struct sysctl_oid *oidp, struct sysctl_req *req ) +kperf_sampling_set(uint32_t sample_start) { - int error = 0; - int value = 0; + if (sample_start) { + return kperf_sampling_enable(); + } else { + return kperf_sampling_disable(); + } +} - /* get the old value and process it */ - value = blessed_preempt; +static int +sysctl_sampling(struct sysctl_req *req) +{ + return kperf_sysctl_get_set_uint32(req, kperf_sampling_status, + kperf_sampling_set); +} - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); +static int +sysctl_action_count(struct sysctl_req *req) +{ + return kperf_sysctl_get_set_uint32(req, kperf_action_get_count, + kperf_action_set_count); +} - /* if that worked, and we're writing... */ - blessed_preempt = value ? TRUE : FALSE; +static int +sysctl_timer_count(struct sysctl_req *req) +{ + return kperf_sysctl_get_set_uint32(req, kperf_timer_get_count, + kperf_timer_set_count); +} - return 0; +static int +sysctl_timer_action(struct sysctl_req *req) +{ + return kperf_sysctl_get_set_unsigned_uint32(req, kperf_timer_get_action, + kperf_timer_set_action); } +static int +sysctl_timer_pet(struct sysctl_req *req) +{ + return kperf_sysctl_get_set_uint32(req, kperf_timer_get_petid, + kperf_timer_set_petid); +} static int -sysctl_kdbg_callstacks( struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_bless_preempt(struct sysctl_req *req) { - int error = 0; - int value = 0; - - /* get the old value and process it */ - value = kperf_kdbg_get_stacks(); + return sysctl_io_number(req, ktrace_root_set_owner_allowed, + sizeof(ktrace_root_set_owner_allowed), + &ktrace_root_set_owner_allowed, NULL); +} - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); +static int +sysctl_kperf_reset(struct sysctl_req *req) +{ + int should_reset = 0; - /* if that worked, and we're writing... */ - error = kperf_kdbg_set_stacks(value); + int error = sysctl_io_number(req, should_reset, sizeof(should_reset), + &should_reset, NULL); + if (error) { + return error; + } - return error; + if (should_reset) { + ktrace_reset(KTRACE_KPERF); + } + return 0; } static int -sysctl_pet_idle_rate( struct sysctl_oid *oidp, struct sysctl_req *req ) +sysctl_pet_idle_rate(struct sysctl_req *req) { - int error = 0; - int value = 0; - - /* get the old value and process it */ - value = kperf_get_pet_idle_rate(); - - /* copy out the old value, get the new value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || !req->newptr) - return (error); + return kperf_sysctl_get_set_int(req, kperf_get_pet_idle_rate, + kperf_set_pet_idle_rate); +} - /* if that worked, and we're writing... */ - kperf_set_pet_idle_rate(value); +static int +sysctl_lightweight_pet(struct sysctl_req *req) +{ + return kperf_sysctl_get_set_int(req, kperf_get_lightweight_pet, + kperf_set_lightweight_pet); +} - return error; +static int +sysctl_kdbg_cswitch(struct sysctl_req *req) +{ + return kperf_sysctl_get_set_int(req, kperf_kdbg_cswitch_get, + kperf_kdbg_cswitch_set); } -/* - * #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp, \ - * void *arg1, int arg2, \ - * struct sysctl_req *req ) - */ static int kperf_sysctl SYSCTL_HANDLER_ARGS { +#pragma unused(oidp, arg2) int ret; + uintptr_t type = (uintptr_t)arg1; - // __unused struct sysctl_oid *unused_oidp = oidp; - (void)arg2; - - if ( !kperf_cfg_initted ) - panic("kperf_bootstrap not called"); + ktrace_lock(); - ret = kperf_access_check(); - if (ret) { - return ret; + if (req->oldptr == USER_ADDR_NULL && req->newptr != USER_ADDR_NULL) { + if ((ret = ktrace_configure(KTRACE_KPERF))) { + ktrace_unlock(); + return ret; + } + } else { + if ((ret = ktrace_read_check())) { + ktrace_unlock(); + return ret; + } } - lck_mtx_lock(&kperf_cfg_lock); - /* which request */ - switch( (uintptr_t) arg1 ) - { + switch (type) { case REQ_ACTION_COUNT: - ret = sysctl_action_count( oidp, req ); + ret = sysctl_action_count(req); break; case REQ_ACTION_SAMPLERS: - ret = sysctl_action_samplers( oidp, req ); + ret = sysctl_action_samplers(req); break; case REQ_ACTION_USERDATA: - ret = sysctl_action_userdata( oidp, req ); + ret = sysctl_action_userdata(req); break; case REQ_TIMER_COUNT: - ret = sysctl_timer_count( oidp, req ); + ret = sysctl_timer_count(req); break; case REQ_TIMER_PERIOD: - ret = sysctl_timer_period( oidp, req ); + ret = sysctl_timer_period(req); break; case REQ_TIMER_PET: - ret = sysctl_timer_pet( oidp, req ); + ret = sysctl_timer_pet(req); break; case REQ_TIMER_ACTION: - ret = sysctl_timer_action( oidp, req ); + ret = sysctl_timer_action(req); break; case REQ_SAMPLING: - ret = sysctl_sampling( oidp, req ); + ret = sysctl_sampling(req); break; - case REQ_KDBG_CALLSTACKS: - ret = sysctl_kdbg_callstacks( oidp, req ); + case REQ_KDBG_CSWITCH: + ret = sysctl_kdbg_cswitch(req); break; case REQ_ACTION_FILTER_BY_TASK: - ret = sysctl_action_filter( oidp, req, 1 ); + ret = sysctl_action_filter(req, true); break; case REQ_ACTION_FILTER_BY_PID: - ret = sysctl_action_filter( oidp, req, 0 ); + ret = sysctl_action_filter(req, false); + break; + case REQ_KDEBUG_ACTION: + ret = sysctl_kdebug_action(req); + break; + case REQ_KDEBUG_FILTER: + ret = sysctl_kdebug_filter(req); break; case REQ_PET_IDLE_RATE: - ret = sysctl_pet_idle_rate( oidp, req ); + ret = sysctl_pet_idle_rate(req); break; case REQ_BLESS_PREEMPT: - ret = sysctl_bless_preempt( oidp, req ); + ret = sysctl_bless_preempt(req); break; + case REQ_RESET: + ret = sysctl_kperf_reset(req); + break; + case REQ_ACTION_UCALLSTACK_DEPTH: + ret = sysctl_action_ucallstack_depth(req); + break; + case REQ_ACTION_KCALLSTACK_DEPTH: + ret = sysctl_action_kcallstack_depth(req); + break; + case REQ_LIGHTWEIGHT_PET: + ret = sysctl_lightweight_pet(req); + break; default: ret = ENOENT; break; } - lck_mtx_unlock(&kperf_cfg_lock); + ktrace_unlock(); return ret; } @@ -528,209 +500,244 @@ kperf_sysctl SYSCTL_HANDLER_ARGS static int kperf_sysctl_bless_handler SYSCTL_HANDLER_ARGS { +#pragma unused(oidp, arg2) int ret; - // __unused struct sysctl_oid *unused_oidp = oidp; - (void)arg2; - - if ( !kperf_cfg_initted ) - panic("kperf_bootstrap not called"); - - lck_mtx_lock(&kperf_cfg_lock); - - /* which request */ - if ( (uintptr_t) arg1 == REQ_BLESS ) - ret = sysctl_bless( oidp, req ); - else - ret = ENOENT; - - lck_mtx_unlock(&kperf_cfg_lock); - - return ret; -} - -/*************************** - * - * Access control - * - ***************************/ - -/* Validate whether the current process has priviledges to access - * kperf (and by extension, trace). Returns 0 if access is granted. - */ -int -kperf_access_check(void) -{ - proc_t p = current_proc(); - proc_t blessed_p; - int ret = 0; - boolean_t pid_gone = FALSE; - - /* check if the pid that held the lock is gone */ - blessed_p = proc_find(blessed_pid); - - if ( blessed_p != NULL ) - proc_rele(blessed_p); - else - pid_gone = TRUE; - - if ( blessed_pid == -1 || pid_gone ) { - /* check for root */ - ret = suser(kauth_cred_get(), &p->p_acflag); - if( !ret ) + ktrace_lock(); + + /* if setting a new "blessed pid" (ktrace owning pid) */ + if (req->newptr != USER_ADDR_NULL) { + /* + * root can bypass the ktrace check when a flag is set (for + * backwards compatibility) or when ownership is maintained over + * subsystems resets (to allow the user space process that set + * ownership to unset it). + */ + if (!((ktrace_root_set_owner_allowed || + ktrace_keep_ownership_on_reset) && + kauth_cred_issuser(kauth_cred_get()))) + { + if ((ret = ktrace_configure(KTRACE_KPERF))) { + ktrace_unlock(); + return ret; + } + } + } else { + if ((ret = ktrace_read_check())) { + ktrace_unlock(); return ret; - } - - /* check against blessed pid */ - if( p->p_pid != blessed_pid ) - return EACCES; - - /* access granted. */ - return 0; -} - -/* specify a pid as being able to access kperf/trace, depiste not - * being root - */ -int -kperf_bless_pid(pid_t newpid) -{ - proc_t p = NULL; - pid_t current_pid; - - p = current_proc(); - current_pid = p->p_pid; - - /* are we allowed to preempt? */ - if ( (newpid != -1) && (blessed_pid != -1) && - (blessed_pid != current_pid) && !blessed_preempt ) { - /* check if the pid that held the lock is gone */ - p = proc_find(blessed_pid); - - if ( p != NULL ) { - proc_rele(p); - return EACCES; } } - /* validate new pid */ - if ( newpid != -1 ) { - p = proc_find(newpid); - - if ( p == NULL ) - return EINVAL; - - proc_rele(p); + /* which request */ + if ((uintptr_t)arg1 == REQ_BLESS) { + ret = sysctl_bless(req); + } else { + ret = ENOENT; } - /* take trace facility as well */ - kdbg_swap_global_state_pid(blessed_pid, newpid); - - blessed_pid = newpid; - blessed_preempt = FALSE; + ktrace_unlock(); - return 0; + return ret; } -/*************************** - * - * sysctl hooks - * - ***************************/ - /* root kperf node */ -SYSCTL_NODE(, OID_AUTO, kperf, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + +SYSCTL_NODE(, OID_AUTO, kperf, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "kperf"); -/* action sub-section */ -SYSCTL_NODE(_kperf, OID_AUTO, action, CTLFLAG_RW|CTLFLAG_LOCKED, 0, +/* actions */ + +SYSCTL_NODE(_kperf, OID_AUTO, action, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "action"); SYSCTL_PROC(_kperf_action, OID_AUTO, count, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_ACTION_COUNT, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_ACTION_COUNT, sizeof(int), kperf_sysctl, "I", "Number of actions"); SYSCTL_PROC(_kperf_action, OID_AUTO, samplers, - CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_ACTION_SAMPLERS, - 3*sizeof(uint64_t), kperf_sysctl, "UQ", - "What to sample what a trigger fires an action"); + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_ACTION_SAMPLERS, + 3 * sizeof(uint64_t), kperf_sysctl, "UQ", + "What to sample when a trigger fires an action"); SYSCTL_PROC(_kperf_action, OID_AUTO, userdata, - CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_ACTION_USERDATA, - 3*sizeof(uint64_t), kperf_sysctl, "UQ", + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_ACTION_USERDATA, + 3 * sizeof(uint64_t), kperf_sysctl, "UQ", "User data to attribute to action"); SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_task, - CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_ACTION_FILTER_BY_TASK, - 3*sizeof(uint64_t), kperf_sysctl, "UQ", + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_ACTION_FILTER_BY_TASK, + 3 * sizeof(uint64_t), kperf_sysctl, "UQ", "Apply a task filter to the action"); SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_pid, - CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_ACTION_FILTER_BY_PID, - 3*sizeof(uint64_t), kperf_sysctl, "UQ", + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_ACTION_FILTER_BY_PID, + 3 * sizeof(uint64_t), kperf_sysctl, "UQ", "Apply a pid filter to the action"); -/* timer sub-section */ -SYSCTL_NODE(_kperf, OID_AUTO, timer, CTLFLAG_RW|CTLFLAG_LOCKED, 0, +SYSCTL_PROC(_kperf_action, OID_AUTO, ucallstack_depth, + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_ACTION_UCALLSTACK_DEPTH, + sizeof(int), kperf_sysctl, "I", + "Maximum number of frames to include in user callstacks"); + +SYSCTL_PROC(_kperf_action, OID_AUTO, kcallstack_depth, + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_ACTION_KCALLSTACK_DEPTH, + sizeof(int), kperf_sysctl, "I", + "Maximum number of frames to include in kernel callstacks"); + +/* timers */ + +SYSCTL_NODE(_kperf, OID_AUTO, timer, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "timer"); SYSCTL_PROC(_kperf_timer, OID_AUTO, count, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_TIMER_COUNT, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_TIMER_COUNT, sizeof(int), kperf_sysctl, "I", "Number of time triggers"); SYSCTL_PROC(_kperf_timer, OID_AUTO, period, - CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_TIMER_PERIOD, - 2*sizeof(uint64_t), kperf_sysctl, "UQ", "Timer number and period"); + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_TIMER_PERIOD, + 2 * sizeof(uint64_t), kperf_sysctl, "UQ", + "Timer number and period"); SYSCTL_PROC(_kperf_timer, OID_AUTO, action, - CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_TIMER_ACTION, - 2*sizeof(uint64_t), kperf_sysctl, "UQ", "Timer number and actionid"); + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_TIMER_ACTION, + 2 * sizeof(uint64_t), kperf_sysctl, "UQ", + "Timer number and actionid"); SYSCTL_PROC(_kperf_timer, OID_AUTO, pet_timer, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_TIMER_PET, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_TIMER_PET, sizeof(int), kperf_sysctl, "I", "Which timer ID does PET"); +/* kdebug trigger */ + +SYSCTL_NODE(_kperf, OID_AUTO, kdebug, CTLFLAG_RW | CTLFLAG_LOCKED, 0, + "kdebug"); + +SYSCTL_PROC(_kperf_kdebug, OID_AUTO, action, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void*)REQ_KDEBUG_ACTION, + sizeof(int), kperf_sysctl, "I", "ID of action to trigger on kdebug events"); + +SYSCTL_PROC(_kperf_kdebug, OID_AUTO, filter, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void*)REQ_KDEBUG_FILTER, + sizeof(int), kperf_sysctl, "P", "The filter that determines which kdebug events trigger a sample"); + /* misc */ + SYSCTL_PROC(_kperf, OID_AUTO, sampling, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_SAMPLING, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_SAMPLING, sizeof(int), kperf_sysctl, "I", "Sampling running"); +SYSCTL_PROC(_kperf, OID_AUTO, reset, + CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MASKED | CTLFLAG_LOCKED, + (void *)REQ_RESET, + 0, kperf_sysctl, "-", "Reset kperf"); + SYSCTL_PROC(_kperf, OID_AUTO, blessed_pid, - CTLTYPE_INT|CTLFLAG_RW, /* must be root */ - (void*)REQ_BLESS, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, /* must be root */ + (void *)REQ_BLESS, sizeof(int), kperf_sysctl_bless_handler, "I", "Blessed pid"); SYSCTL_PROC(_kperf, OID_AUTO, blessed_preempt, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_BLESS_PREEMPT, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_BLESS_PREEMPT, sizeof(int), kperf_sysctl, "I", "Blessed preemption"); +SYSCTL_PROC(_kperf, OID_AUTO, kdbg_cswitch, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_KDBG_CSWITCH, + sizeof(int), kperf_sysctl, "I", "Generate context switch info"); -SYSCTL_PROC(_kperf, OID_AUTO, kdbg_callstacks, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_KDBG_CALLSTACKS, - sizeof(int), kperf_sysctl, "I", "Generate kdbg callstacks"); +SYSCTL_PROC(_kperf, OID_AUTO, pet_idle_rate, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_PET_IDLE_RATE, + sizeof(int), kperf_sysctl, "I", + "Rate at which unscheduled threads are forced to be sampled in " + "PET mode"); -SYSCTL_INT(_kperf, OID_AUTO, kdbg_cswitch, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - &kperf_cswitch_hook, 0, "Generate context switch info"); +SYSCTL_PROC(_kperf, OID_AUTO, lightweight_pet, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_LIGHTWEIGHT_PET, + sizeof(int), kperf_sysctl, "I", + "Status of lightweight PET mode"); -SYSCTL_PROC(_kperf, OID_AUTO, pet_idle_rate, - CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, - (void*)REQ_PET_IDLE_RATE, - sizeof(int), kperf_sysctl, "I", "Rate at which unscheduled threads are forced to be sampled in PET mode"); +/* limits */ + +SYSCTL_NODE(_kperf, OID_AUTO, limits, CTLFLAG_RW | CTLFLAG_LOCKED, 0, + "limits"); + +#define REQ_LIM_PERIOD_NS (1) +#define REQ_LIM_BG_PERIOD_NS (2) +#define REQ_LIM_PET_PERIOD_NS (3) +#define REQ_LIM_BG_PET_PERIOD_NS (4) + +static int +kperf_sysctl_limits SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int type = (int)arg1; + uint64_t limit = 0; + + switch (type) { + case REQ_LIM_PERIOD_NS: + limit = KP_MIN_PERIOD_NS; + break; + + case REQ_LIM_BG_PERIOD_NS: + limit = KP_MIN_PERIOD_BG_NS; + break; + + case REQ_LIM_PET_PERIOD_NS: + limit = KP_MIN_PERIOD_PET_NS; + break; + + case REQ_LIM_BG_PET_PERIOD_NS: + limit = KP_MIN_PERIOD_PET_BG_NS; + break; + + default: + return ENOENT; + } + + return sysctl_io_number(req, limit, sizeof(limit), &limit, NULL); +} + +SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_period_ns, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_LIM_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits, + "Q", "Minimum timer period in nanoseconds"); +SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_bg_period_ns, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_LIM_BG_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits, + "Q", "Minimum background timer period in nanoseconds"); +SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_pet_period_ns, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_LIM_PET_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits, + "Q", "Minimum PET timer period in nanoseconds"); +SYSCTL_PROC(_kperf_limits, OID_AUTO, timer_min_bg_pet_period_ns, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, + (void *)REQ_LIM_BG_PET_PERIOD_NS, sizeof(uint64_t), kperf_sysctl_limits, + "Q", "Minimum background PET timer period in nanoseconds"); /* debug */ -SYSCTL_INT(_kperf, OID_AUTO, debug_level, CTLFLAG_RW, +SYSCTL_INT(_kperf, OID_AUTO, debug_level, CTLFLAG_RW | CTLFLAG_LOCKED, &kperf_debug_level, 0, "debug level"); +#if DEVELOPMENT || DEBUG +SYSCTL_QUAD(_kperf, OID_AUTO, already_pending_ipis, + CTLFLAG_RD | CTLFLAG_LOCKED, + &kperf_pending_ipis, ""); +#endif /* DEVELOPMENT || DEBUG */