git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2017 Apple Inc. All rights reserved.
	3	*/
	4
	5	#include <IOKit/perfcontrol/IOPerfControl.h>
	6
	7	#include <stdatomic.h>
	8
	9	#include <kern/thread_group.h>
	10
	11	#undef super
	12	#define super OSObject
	13	OSDefineMetaClassAndStructors(IOPerfControlClient, OSObject);
	14
	15	static IOPerfControlClient::IOPerfControlClientShared *_Atomic gIOPerfControlClientShared;
	16
	17	bool
	18	IOPerfControlClient::init(IOService *driver, uint64_t maxWorkCapacity)
	19	{
	20	// TODO: Remove this limit and implement dynamic table growth if workloads are found that exceed this
	21	if (maxWorkCapacity > kMaxWorkTableNumEntries) {
	22	maxWorkCapacity = kMaxWorkTableNumEntries;
	23	}
	24
	25	if (!super::init()) {
	26	return false;
	27	}
	28
	29	shared = atomic_load_explicit(&gIOPerfControlClientShared, memory_order_acquire);
	30	if (shared == nullptr) {
	31	IOPerfControlClient::IOPerfControlClientShared *expected = shared;
	32	shared = reinterpret_cast<IOPerfControlClient::IOPerfControlClientShared*>(kalloc(sizeof(IOPerfControlClientShared)));
	33	if (!shared) {
	34	return false;
	35	}
	36
	37	atomic_init(&shared->maxDriverIndex, 0);
	38
	39	shared->interface = PerfControllerInterface{
	40	.version = 0,
	41	.registerDevice =
	42	[](IOService *device) {
	43	return kIOReturnSuccess;
	44	},
	45	.unregisterDevice =
	46	[](IOService *device) {
	47	return kIOReturnSuccess;
	48	},
	49	.workCanSubmit =
	50	[](IOService device, PerfControllerInterface::WorkState state, WorkSubmitArgs *args) {
	51	return false;
	52	},
	53	.workSubmit =
	54	[](IOService device, uint64_t token, PerfControllerInterface::WorkState state, WorkSubmitArgs *args) {
	55	},
	56	.workBegin =
	57	[](IOService device, uint64_t token, PerfControllerInterface::WorkState state, WorkBeginArgs *args) {
	58	},
	59	.workEnd =
	60	[](IOService device, uint64_t token, PerfControllerInterface::WorkState state, WorkEndArgs *args, bool done) {
	61	},
	62	};
	63
	64	shared->interfaceLock = IOLockAlloc();
	65	if (!shared->interfaceLock) {
	66	goto shared_init_error;
	67	}
	68
	69	shared->deviceRegistrationList = OSSet::withCapacity(4);
	70	if (!shared->deviceRegistrationList) {
	71	goto shared_init_error;
	72	}
	73
	74	if (!atomic_compare_exchange_strong_explicit(&gIOPerfControlClientShared, &expected, shared, memory_order_acq_rel,
	75	memory_order_acquire)) {
	76	IOLockFree(shared->interfaceLock);
	77	shared->deviceRegistrationList->release();
	78	kfree(shared, sizeof(*shared));
	79	shared = expected;
	80	}
	81	}
	82
	83	// Note: driverIndex is not guaranteed to be unique if maxDriverIndex wraps around. It is intended for debugging only.
	84	driverIndex = atomic_fetch_add_explicit(&shared->maxDriverIndex, 1, memory_order_relaxed) + 1;
	85
	86	// + 1 since index 0 is unused for kIOPerfControlClientWorkUntracked
	87	workTableLength = maxWorkCapacity + 1;
	88	assertf(workTableLength <= kWorkTableMaxSize, "%zu exceeds max allowed capacity of %zu", workTableLength, kWorkTableMaxSize);
	89	if (maxWorkCapacity > 0) {
	90	workTable = reinterpret_cast<WorkTableEntry>(kalloc(workTableLength sizeof(WorkTableEntry)));
	91	if (!workTable) {
	92	goto error;
	93	}
	94	bzero(workTable, workTableLength * sizeof(WorkTableEntry));
	95	workTableNextIndex = 1;
	96
	97	workTableLock = IOSimpleLockAlloc();
	98	if (!workTableLock) {
	99	goto error;
	100	}
	101	}
	102
	103	return true;
	104
	105	error:
	106	if (workTable) {
	107	kfree(workTable, maxWorkCapacity * sizeof(WorkTableEntry));
	108	}
	109	if (workTableLock) {
	110	IOSimpleLockFree(workTableLock);
	111	}
	112	return false;
	113	shared_init_error:
	114	if (shared) {
	115	if (shared->interfaceLock) {
	116	IOLockFree(shared->interfaceLock);
	117	}
	118	if (shared->deviceRegistrationList) {
	119	shared->deviceRegistrationList->release();
	120	}
	121	kfree(shared, sizeof(*shared));
	122	shared = nullptr;
	123	}
	124	return false;
	125	}
	126
	127	IOPerfControlClient *
	128	IOPerfControlClient::copyClient(IOService *driver, uint64_t maxWorkCapacity)
	129	{
	130	IOPerfControlClient *client = new IOPerfControlClient;
	131	if (!client \|\| !client->init(driver, maxWorkCapacity)) {
	132	panic("could not create IOPerfControlClient");
	133	}
	134	return client;
	135	}
	136
	137	/* Convert the per driver token into a globally unique token for the performance
	138	* controller's consumption. This is achieved by setting the driver's unique
	139	* index onto the high order bits. The performance controller is shared between
	140	* all drivers and must track all instances separately, while each driver has
	141	* its own token table, so this step is needed to avoid token collisions between
	142	* drivers.
	143	*/
	144	inline uint64_t
	145	IOPerfControlClient::tokenToGlobalUniqueToken(uint64_t token)
	146	{
	147	return token \| (static_cast<uint64_t>(driverIndex) << kWorkTableIndexBits);
	148	}
	149
	150	/* With this implementation, tokens returned to the driver differ from tokens
	151	* passed to the performance controller. This implementation has the nice
	152	* property that tokens returns to the driver will aways be between 1 and
	153	* the value of maxWorkCapacity passed by the driver to copyClient. The tokens
	154	* the performance controller sees will match on the lower order bits and have
	155	* the driver index set on the high order bits.
	156	*/
	157	uint64_t
	158	IOPerfControlClient::allocateToken(thread_group *thread_group)
	159	{
	160	uint64_t token = kIOPerfControlClientWorkUntracked;
	161
	162	#if CONFIG_THREAD_GROUPS
	163	auto s = IOSimpleLockLockDisableInterrupt(workTableLock);
	164
	165	uint64_t num_tries = 0;
	166	size_t index = workTableNextIndex;
	167	// - 1 since entry 0 is for kIOPerfControlClientWorkUntracked
	168	while (num_tries < workTableLength - 1) {
	169	if (workTable[index].thread_group == nullptr) {
	170	thread_group_retain(thread_group);
	171	workTable[index].thread_group = thread_group;
	172	token = index;
	173	// next integer between 1 and workTableLength - 1
	174	workTableNextIndex = (index % (workTableLength - 1)) + 1;
	175	break;
	176	}
	177	// next integer between 1 and workTableLength - 1
	178	index = (index % (workTableLength - 1)) + 1;
	179	num_tries += 1;
	180	}
	181	#if (DEVELOPMENT \|\| DEBUG)
	182	if (token == kIOPerfControlClientWorkUntracked) {
	183	/* When investigating a panic here, first check that the driver is not leaking tokens.
	184	* If the driver is not leaking tokens and maximum is less than kMaxWorkTableNumEntries,
	185	* the driver should be modified to pass a larger value to copyClient.
	186	* If the driver is not leaking tokens and maximum is equal to kMaxWorkTableNumEntries,
	187	* this code will have to be modified to support dynamic table growth to support larger
	188	* numbers of tokens.
	189	*/
	190	panic("Tokens allocated for this device exceeded maximum of %zu.\n",
	191	workTableLength - 1); // - 1 since entry 0 is for kIOPerfControlClientWorkUntracked
	192	}
	193	#endif
	194
	195	IOSimpleLockUnlockEnableInterrupt(workTableLock, s);
	196	#endif
	197
	198	return token;
	199	}
	200
	201	void
	202	IOPerfControlClient::deallocateToken(uint64_t token)
	203	{
	204	#if CONFIG_THREAD_GROUPS
	205	assertf(token != kIOPerfControlClientWorkUntracked, "Attempt to deallocate token kIOPerfControlClientWorkUntracked\n");
	206	assertf(token <= workTableLength, "Attempt to deallocate token %llu which is greater than the table size of %zu\n", token, workTableLength);
	207	auto s = IOSimpleLockLockDisableInterrupt(workTableLock);
	208
	209	auto &entry = workTable[token];
	210	auto *thread_group = entry.thread_group;
	211	bzero(&entry, sizeof(entry));
	212	workTableNextIndex = token;
	213
	214	IOSimpleLockUnlockEnableInterrupt(workTableLock, s);
	215
	216	// This can call into the performance controller if the last reference is dropped here. Are we sure
	217	// the driver isn't holding any locks? If not, we may want to async this to another context.
	218	thread_group_release(thread_group);
	219	#endif
	220	}
	221
	222	IOPerfControlClient::WorkTableEntry *
	223	IOPerfControlClient::getEntryForToken(uint64_t token)
	224	{
	225	if (token == kIOPerfControlClientWorkUntracked) {
	226	return nullptr;
	227	}
	228
	229	if (token >= workTableLength) {
	230	panic("Invalid work token (%llu): index out of bounds.", token);
	231	}
	232
	233	WorkTableEntry *entry = &workTable[token];
	234	assertf(entry->thread_group, "Invalid work token: %llu", token);
	235	return entry;
	236	}
	237
	238	void
	239	IOPerfControlClient::markEntryStarted(uint64_t token, bool started)
	240	{
	241	if (token == kIOPerfControlClientWorkUntracked) {
	242	return;
	243	}
	244
	245	if (token >= workTableLength) {
	246	panic("Invalid work token (%llu): index out of bounds.", token);
	247	}
	248
	249	workTable[token].started = started;
	250	}
	251
	252	IOReturn
	253	IOPerfControlClient::registerDevice(__unused IOService driver, IOService device)
	254	{
	255	IOReturn ret = kIOReturnSuccess;
	256
	257	IOLockLock(shared->interfaceLock);
	258
	259	if (shared->interface.version > 0) {
	260	ret = shared->interface.registerDevice(device);
	261	} else {
	262	shared->deviceRegistrationList->setObject(device);
	263	}
	264
	265	IOLockUnlock(shared->interfaceLock);
	266
	267	return ret;
	268	}
	269
	270	void
	271	IOPerfControlClient::unregisterDevice(__unused IOService driver, IOService device)
	272	{
	273	IOLockLock(shared->interfaceLock);
	274
	275	if (shared->interface.version > 0) {
	276	shared->interface.unregisterDevice(device);
	277	} else {
	278	shared->deviceRegistrationList->removeObject(device);
	279	}
	280
	281	IOLockUnlock(shared->interfaceLock);
	282	}
	283
	284	uint64_t
	285	IOPerfControlClient::workSubmit(IOService device, WorkSubmitArgs args)
	286	{
	287	#if CONFIG_THREAD_GROUPS
	288	auto *thread_group = thread_group_get(current_thread());
	289	if (!thread_group) {
	290	return kIOPerfControlClientWorkUntracked;
	291	}
	292
	293	PerfControllerInterface::WorkState state{
	294	.thread_group_id = thread_group_get_id(thread_group),
	295	.thread_group_data = thread_group_get_machine_data(thread_group),
	296	.work_data = nullptr,
	297	.work_data_size = 0,
	298	.started = false,
	299	};
	300	if (!shared->interface.workCanSubmit(device, &state, args)) {
	301	return kIOPerfControlClientWorkUntracked;
	302	}
	303
	304	uint64_t token = allocateToken(thread_group);
	305	if (token != kIOPerfControlClientWorkUntracked) {
	306	state.work_data = &workTable[token].perfcontrol_data;
	307	state.work_data_size = sizeof(workTable[token].perfcontrol_data);
	308	shared->interface.workSubmit(device, tokenToGlobalUniqueToken(token), &state, args);
	309	}
	310	return token;
	311	#else
	312	return kIOPerfControlClientWorkUntracked;
	313	#endif
	314	}
	315
	316	uint64_t
	317	IOPerfControlClient::workSubmitAndBegin(IOService device, WorkSubmitArgs submitArgs, WorkBeginArgs *beginArgs)
	318	{
	319	#if CONFIG_THREAD_GROUPS
	320	auto *thread_group = thread_group_get(current_thread());
	321	if (!thread_group) {
	322	return kIOPerfControlClientWorkUntracked;
	323	}
	324
	325	PerfControllerInterface::WorkState state{
	326	.thread_group_id = thread_group_get_id(thread_group),
	327	.thread_group_data = thread_group_get_machine_data(thread_group),
	328	.work_data = nullptr,
	329	.work_data_size = 0,
	330	.started = false,
	331	};
	332	if (!shared->interface.workCanSubmit(device, &state, submitArgs)) {
	333	return kIOPerfControlClientWorkUntracked;
	334	}
	335
	336	uint64_t token = allocateToken(thread_group);
	337	if (token != kIOPerfControlClientWorkUntracked) {
	338	auto &entry = workTable[token];
	339	state.work_data = &entry.perfcontrol_data;
	340	state.work_data_size = sizeof(workTable[token].perfcontrol_data);
	341	shared->interface.workSubmit(device, tokenToGlobalUniqueToken(token), &state, submitArgs);
	342	state.started = true;
	343	shared->interface.workBegin(device, tokenToGlobalUniqueToken(token), &state, beginArgs);
	344	markEntryStarted(token, true);
	345	}
	346	return token;
	347	#else
	348	return kIOPerfControlClientWorkUntracked;
	349	#endif
	350	}
	351
	352	void
	353	IOPerfControlClient::workBegin(IOService device, uint64_t token, WorkBeginArgs args)
	354	{
	355	#if CONFIG_THREAD_GROUPS
	356	WorkTableEntry *entry = getEntryForToken(token);
	357	if (entry == nullptr) {
	358	return;
	359	}
	360
	361	assertf(!entry->started, "Work for token %llu was already started", token);
	362
	363	PerfControllerInterface::WorkState state{
	364	.thread_group_id = thread_group_get_id(entry->thread_group),
	365	.thread_group_data = thread_group_get_machine_data(entry->thread_group),
	366	.work_data = &entry->perfcontrol_data,
	367	.work_data_size = sizeof(entry->perfcontrol_data),
	368	.started = true,
	369	};
	370	shared->interface.workBegin(device, tokenToGlobalUniqueToken(token), &state, args);
	371	markEntryStarted(token, true);
	372	#endif
	373	}
	374
	375	void
	376	IOPerfControlClient::workEnd(IOService device, uint64_t token, WorkEndArgs args, bool done)
	377	{
	378	#if CONFIG_THREAD_GROUPS
	379	WorkTableEntry *entry = getEntryForToken(token);
	380	if (entry == nullptr) {
	381	return;
	382	}
	383
	384	PerfControllerInterface::WorkState state{
	385	.thread_group_id = thread_group_get_id(entry->thread_group),
	386	.thread_group_data = thread_group_get_machine_data(entry->thread_group),
	387	.work_data = &entry->perfcontrol_data,
	388	.work_data_size = sizeof(entry->perfcontrol_data),
	389	.started = entry->started,
	390	};
	391	shared->interface.workEnd(device, tokenToGlobalUniqueToken(token), &state, args, done);
	392
	393	if (done) {
	394	deallocateToken(token);
	395	} else {
	396	markEntryStarted(token, false);
	397	}
	398	#endif
	399	}
	400
	401	static _Atomic uint64_t unique_work_context_id = 1ull;
	402
	403	class IOPerfControlWorkContext : public OSObject
	404	{
	405	OSDeclareDefaultStructors(IOPerfControlWorkContext);
	406
	407	public:
	408	uint64_t id;
	409	struct thread_group *thread_group;
	410	bool started;
	411	uint8_t perfcontrol_data[32];
	412
	413	bool init() override;
	414	void reset();
	415	void free() override;
	416	};
	417
	418	OSDefineMetaClassAndStructors(IOPerfControlWorkContext, OSObject);
	419
	420	bool
	421	IOPerfControlWorkContext::init()
	422	{
	423	if (!super::init()) {
	424	return false;
	425	}
	426	id = atomic_fetch_add_explicit(&unique_work_context_id, 1, memory_order_relaxed) + 1;
	427	reset();
	428	return true;
	429	}
	430
	431	void
	432	IOPerfControlWorkContext::reset()
	433	{
	434	thread_group = nullptr;
	435	started = false;
	436	bzero(perfcontrol_data, sizeof(perfcontrol_data));
	437	}
	438
	439	void
	440	IOPerfControlWorkContext::free()
	441	{
	442	assertf(thread_group == nullptr, "IOPerfControlWorkContext ID %llu being released without calling workEnd!\n", id);
	443	super::free();
	444	}
	445
	446	OSObject *
	447	IOPerfControlClient::copyWorkContext()
	448	{
	449	IOPerfControlWorkContext *context = new IOPerfControlWorkContext;
	450
	451	if (context == nullptr) {
	452	return nullptr;
	453	}
	454
	455	if (!context->init()) {
	456	context->free();
	457	return nullptr;
	458	}
	459
	460	return OSDynamicCast(OSObject, context);
	461	}
	462
	463	bool
	464	IOPerfControlClient::workSubmitAndBeginWithContext(IOService device, OSObject context, WorkSubmitArgs submitArgs, WorkBeginArgs beginArgs)
	465	{
	466	#if CONFIG_THREAD_GROUPS
	467
	468	if (workSubmitWithContext(device, context, submitArgs) == false) {
	469	return false;
	470	}
	471
	472	IOPerfControlWorkContext *work_context = OSDynamicCast(IOPerfControlWorkContext, context);
	473
	474	PerfControllerInterface::WorkState state{
	475	.thread_group_id = thread_group_get_id(work_context->thread_group),
	476	.thread_group_data = thread_group_get_machine_data(work_context->thread_group),
	477	.work_data = &work_context->perfcontrol_data,
	478	.work_data_size = sizeof(work_context->perfcontrol_data),
	479	.started = true,
	480	};
	481
	482	shared->interface.workBegin(device, work_context->id, &state, beginArgs);
	483
	484	work_context->started = true;
	485
	486	return true;
	487	#else
	488	return false;
	489	#endif
	490	}
	491
	492	bool
	493	IOPerfControlClient::workSubmitWithContext(IOService device, OSObject context, WorkSubmitArgs *args)
	494	{
	495	#if CONFIG_THREAD_GROUPS
	496	IOPerfControlWorkContext *work_context = OSDynamicCast(IOPerfControlWorkContext, context);
	497
	498	if (work_context == nullptr) {
	499	return false;
	500	}
	501
	502	auto *thread_group = thread_group_get(current_thread());
	503	assert(thread_group != nullptr);
	504
	505	assertf(!work_context->started, "IOPerfControlWorkContext ID %llu was already started", work_context->id);
	506	assertf(work_context->thread_group == nullptr, "IOPerfControlWorkContext ID %llu has already taken a refcount on TG 0x%p \n", work_context->id, (void *)(work_context->thread_group));
	507
	508	PerfControllerInterface::WorkState state{
	509	.thread_group_id = thread_group_get_id(thread_group),
	510	.thread_group_data = thread_group_get_machine_data(thread_group),
	511	.work_data = nullptr,
	512	.work_data_size = 0,
	513	.started = false,
	514	};
	515	if (!shared->interface.workCanSubmit(device, &state, args)) {
	516	return false;
	517	}
	518
	519	work_context->thread_group = thread_group_retain(thread_group);
	520
	521	state.work_data = &work_context->perfcontrol_data;
	522	state.work_data_size = sizeof(work_context->perfcontrol_data);
	523
	524	shared->interface.workSubmit(device, work_context->id, &state, args);
	525
	526	return true;
	527	#else
	528	return false;
	529	#endif
	530	}
	531
	532	void
	533	IOPerfControlClient::workBeginWithContext(IOService device, OSObject context, WorkBeginArgs *args)
	534	{
	535	#if CONFIG_THREAD_GROUPS
	536	IOPerfControlWorkContext *work_context = OSDynamicCast(IOPerfControlWorkContext, context);
	537
	538	if (work_context == nullptr) {
	539	return;
	540	}
	541
	542	if (work_context->thread_group == nullptr) {
	543	// This Work Context has not taken a refcount on a TG
	544	return;
	545	}
	546
	547	assertf(!work_context->started, "IOPerfControlWorkContext %llu was already started", work_context->id);
	548
	549	PerfControllerInterface::WorkState state{
	550	.thread_group_id = thread_group_get_id(work_context->thread_group),
	551	.thread_group_data = thread_group_get_machine_data(work_context->thread_group),
	552	.work_data = &work_context->perfcontrol_data,
	553	.work_data_size = sizeof(work_context->perfcontrol_data),
	554	.started = true,
	555	};
	556	shared->interface.workBegin(device, work_context->id, &state, args);
	557
	558	work_context->started = true;
	559	#endif
	560	}
	561
	562	void
	563	IOPerfControlClient::workEndWithContext(IOService device, OSObject context, WorkEndArgs *args, bool done)
	564	{
	565	#if CONFIG_THREAD_GROUPS
	566	IOPerfControlWorkContext *work_context = OSDynamicCast(IOPerfControlWorkContext, context);
	567
	568	if (work_context == nullptr) {
	569	return;
	570	}
	571
	572	if (work_context->thread_group == nullptr) {
	573	return;
	574	}
	575
	576	PerfControllerInterface::WorkState state{
	577	.thread_group_id = thread_group_get_id(work_context->thread_group),
	578	.thread_group_data = thread_group_get_machine_data(work_context->thread_group),
	579	.work_data = &work_context->perfcontrol_data,
	580	.work_data_size = sizeof(work_context->perfcontrol_data),
	581	.started = work_context->started,
	582	};
	583
	584	shared->interface.workEnd(device, work_context->id, &state, args, done);
	585
	586	if (done) {
	587	thread_group_release(work_context->thread_group);
	588	work_context->reset();
	589	} else {
	590	work_context->started = false;
	591	}
	592
	593	return;
	594	#else
	595	return;
	596	#endif
	597	}
	598
	599	IOReturn
	600	IOPerfControlClient::registerPerformanceController(PerfControllerInterface pci)
	601	{
	602	IOReturn result = kIOReturnError;
	603
	604	IOLockLock(shared->interfaceLock);
	605
	606	if (shared->interface.version == 0 && pci.version > 0) {
	607	assert(pci.registerDevice && pci.unregisterDevice && pci.workCanSubmit && pci.workSubmit && pci.workBegin && pci.workEnd);
	608	result = kIOReturnSuccess;
	609
	610	OSObject *obj;
	611	while ((obj = shared->deviceRegistrationList->getAnyObject())) {
	612	IOService *device = OSDynamicCast(IOService, obj);
	613	if (device) {
	614	pci.registerDevice(device);
	615	}
	616	shared->deviceRegistrationList->removeObject(obj);
	617	}
	618
	619	shared->interface = pci;
	620	}
	621
	622	IOLockUnlock(shared->interfaceLock);
	623
	624	return result;
	625	}