[apple/system_cmds.git] / kdprof / EventPrinting.hpp

//
//  EventPrinting.hpp
//  kdprof
//
//  Created by James McIlree on 4/20/13.
//  Copyright (c) 2013 Apple. All rights reserved.
//

#ifndef kdprof_ParallelPrinting_hpp
#define kdprof_ParallelPrinting_hpp

void print_event_header(const Globals& globals, bool is_64_bit);

template <typename SIZE>
char* print_event(char* buf, char* buf_end, const Globals& globals, const Machine<SIZE>& machine, const KDEvent<SIZE>& event, uintptr_t event_index)
{
	// Header is...
	//
	// [Index] Time Type  Code arg1 arg2 arg3 arg4 thread cpu# command/IOP-name pid
	//      8    16    4  34   8/16 8/16 8/16 8/16     10    4 16               6
	//
	// For now, each column is folding up the "after" spacing in a single printf, IOW
	//
	// buf += snprintf(buf, buf_end - buf, "%8s  ", "COL"); /* + 2 spaces */
	//
	// Not:
	//
	// buf += snprintf(buf, buf_end - buf, "%8s", "COL");
	// buf += snprintf(buf, buf_end - buf, "  "); /* 2 spaces */

	ASSERT(event.cpu() > -1 && event.cpu() < machine.cpus().size(), "cpu_id out of range");
	const MachineCPU<SIZE>& cpu = machine.cpus()[event.cpu()];

	//
	// Okay, here is how snprintf works.
	//
	// char buf[2];
	//
	// snprintf(buf, 0, "a"); // Returns 1, buf is unchanged.
	// snprintf(buf, 1, "a"); // Returns 1, buf = \0
	// snprintf(buf, 2, "a"); // Returns 1, buf = 'a', \0

	//
	// If we cannot print successfully, we return the orignal pointer.
	//
	char* orig_buf = buf;

	//
	// [Index]
	//
	if (globals.should_print_event_index()) {
		buf += snprintf(buf, buf_end - buf, "%8llu ", (uint64_t)event_index);
	}

	if (buf >= buf_end)
		return orig_buf;

	//
	// Time
	//
	if (globals.should_print_mach_absolute_timestamps()) {
		if (globals.beginning_of_time().value() == 0)
			buf += snprintf(buf, buf_end - buf, "%16llX  ", (event.timestamp() - globals.beginning_of_time()).value());
		else
			buf += snprintf(buf, buf_end - buf, "%16llu  ", (event.timestamp() - globals.beginning_of_time()).value());
	} else {
		NanoTime ntime = (event.timestamp() - globals.beginning_of_time()).nano_time(globals.timebase());
		buf += snprintf(buf, buf_end - buf, "%16.2f  ", (double)ntime.value() / 1000.0);
	}

	if (buf >= buf_end)
		return orig_buf;

	//
	// Type Code
	//
	const char* type = event.is_func_start() ?  "beg" : (event.is_func_end() ? "end" : "---");
	auto trace_code_it = globals.trace_codes().find(event.dbg_cooked());
	if (cpu.is_iop() || !globals.should_print_symbolic_event_codes() || trace_code_it == globals.trace_codes().end()) {
		buf += snprintf(buf, buf_end - buf, "%4s   %-34x ", type, event.dbg_cooked());
	} else {
		buf += snprintf(buf, buf_end - buf, "%4s   %-34s ", type, trace_code_it->second.c_str());
	}

	if (buf >= buf_end)
		return orig_buf;

	//
	// arg1
	//
	if (event.dbg_class() == DBG_IOKIT && event.dbg_subclass() == DBG_IOPOWER) {
		std::string kext_name = event.arg1_as_string();
		std::reverse(kext_name.begin(), kext_name.end());

		if (SIZE::is_64_bit)
			buf += snprintf(buf, buf_end - buf, "%-16s ", kext_name.c_str());
		else
			buf += snprintf(buf, buf_end - buf, "%-8s ", kext_name.c_str());
	} else {
		if (SIZE::is_64_bit)
			buf += snprintf(buf, buf_end - buf, "%-16llX ", (uint64_t)event.arg1());
		else
			buf += snprintf(buf, buf_end - buf, "%-8x ", (uint32_t)event.arg1());
	}

	if (buf >= buf_end)
		return orig_buf;

	//
	// Profiling showed that the repeated snprintf calls were hot, rolling them up is ~2.5% per on a HUGE file.
	//
	// arg2 arg3 arg4 thread cpu
	//
	if (SIZE::is_64_bit)
		buf += snprintf(buf, buf_end - buf, "%-16llX %-16llX %-16llX %10llX %4u ", (uint64_t)event.arg2(), (uint64_t)event.arg3(), (uint64_t)event.arg4(), (uint64_t)event.tid(), event.cpu());
	else
		buf += snprintf(buf, buf_end - buf, "%-8x %-8x %-8x %10llX %4u ", (uint32_t)event.arg2(), (uint32_t)event.arg3(), (uint32_t)event.arg4(), (uint64_t)event.tid(), event.cpu());

	if (buf >= buf_end)
		return orig_buf;

	//
	// command & pid (handled together due to IOP not printing a pid
	//
	if (cpu.is_iop()) {
		// We print the IOP name instead of a command
		buf += snprintf(buf, buf_end - buf, "%-16s\n", cpu.name());
	} else {
		if (const MachineThread<SIZE>* thread = machine.thread(event.tid(), event.timestamp())) {
			buf += snprintf(buf, buf_end - buf, "%-16s %-6d\n", thread->process().name(), thread->process().pid());
		} else {
			buf += snprintf(buf, buf_end - buf, "%-16s %-6s\n", "?????", "???");
		}
	}

	// Still need to check this, its an error if we overflow on the last print!
	if (buf >= buf_end)
		return orig_buf;

	return buf;
}

template <typename SIZE>
char* print_event_range_to_buffer(const Globals& globals, const Machine<SIZE>& machine, TRange<uintptr_t> range, MemoryBuffer<char>& buffer ) {
	char* cursor = buffer.data();
	char* cursor_end = cursor + buffer.capacity();

	if (const KDEvent<SIZE>* events = machine.events()) {
		ASSERT(TRange<uintptr_t>(0, machine.event_count()).contains(range), "Sanity");
		for (uintptr_t index = range.location(); index < range.max(); ++index) {
			char* temp = print_event(cursor, cursor_end, globals, machine, events[index], index);
			if (temp != cursor)
				cursor = temp;
			else {
				// Changing the capacity will invalidate the cursor
				ptrdiff_t offset = cursor - buffer.data();
				buffer.set_capacity(buffer.capacity()*2);
				cursor = buffer.data() + offset;
				cursor_end = buffer.data() + buffer.capacity();
			}
		}
	}

	return cursor;
}

class PrintWorkUnit {
    protected:
	MemoryBuffer<char>	_buffer;
	TRange<uintptr_t>	_event_range;
	char*			_buffer_end;

	// We do not want work units copied.
	PrintWorkUnit(const PrintWorkUnit& that) = delete;
	PrintWorkUnit& operator=(const PrintWorkUnit& other) = delete;

    public:
	PrintWorkUnit(MemoryBuffer<char>&& buffer, TRange<uintptr_t> event_range, char* buffer_end) :
		_buffer(std::move(buffer)),
		_event_range(event_range),
		_buffer_end(buffer_end)
	{
		ASSERT(_buffer.capacity(), "Sanity");
		ASSERT(_buffer.data(), "Sanity");
		ASSERT(!_buffer_end || _buffer_end > _buffer.data(), "Sanity");
		ASSERT(!_buffer_end || (_buffer_end < _buffer.data() + _buffer.capacity()), "Sanity");
	}

	MemoryBuffer<char>& buffer()			{ return _buffer; }

	TRange<uintptr_t> event_range()			{ return _event_range; }
	void set_event_range(TRange<uintptr_t> range)	{ _event_range = range; }

	char* buffer_end() const			{ return _buffer_end; }
	void set_buffer_end(char* buffer_end)		{ _buffer_end = buffer_end; }
};

template <typename SIZE>
class PrintProducer {
    protected:
	const Globals&		_globals;
	const Machine<SIZE>&	_machine;
	uintptr_t		_start_index;
	uintptr_t		_end_index;
	uintptr_t		_chunk_size;

    public:
	PrintProducer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t chunk_size) :
		_globals(globals),
		_machine(machine),
		_chunk_size(chunk_size)
	{
		_start_index = 0;
		_end_index = machine.event_count();

		if (globals.is_summary_start_set() || globals.is_summary_stop_set()) {
			AbsInterval machine_timespan = machine.timespan();

			KDEvent<SIZE> start_event(globals.summary_start(machine_timespan));
			auto it = std::lower_bound(machine.events(), machine.events() + _end_index, start_event);
			ASSERT(&*it >= machine.events(), "Returned start index lower than start");
			_start_index = std::distance(machine.events(), it);

			KDEvent<SIZE> end_event(globals.summary_stop(machine_timespan));
			it = std::lower_bound(machine.events(), machine.events() + _end_index, end_event);
			ASSERT(&*it <= machine.events() + _end_index, "Returned end index greater than end");
			_end_index = std::distance(machine.events(), it);

			ASSERT(_start_index <= _end_index, "start index is > end index");
		}
	}

	bool produce(PrintWorkUnit& work_unit) {
		// Claim a chunk of work to do
		uintptr_t orig_start_index, new_start_index;
		do {
			orig_start_index = _start_index;
			new_start_index = orig_start_index + std::min(_chunk_size, _end_index - orig_start_index);
		} while (orig_start_index < _end_index && !OSAtomicCompareAndSwapPtrBarrier((void*)orig_start_index, (void *)new_start_index, (void * volatile *)&_start_index));

		// Did we claim work?
		if (orig_start_index < _end_index) {
			TRange<uintptr_t> event_range(orig_start_index, new_start_index - orig_start_index);
			char* end = print_event_range_to_buffer(_globals, _machine, event_range, work_unit.buffer());

			work_unit.set_event_range(event_range);
			work_unit.set_buffer_end(end);
			return true;
		}

		return false;
	}

	uintptr_t start_index() const		{ return _start_index; }
};

template <typename SIZE>
class PrintConsumer {
    protected:
	const Globals&		_globals;
	uintptr_t		_write_index;
	std::mutex		_write_mutex;
	std::condition_variable	_write_condition;

    public:
	PrintConsumer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t start_index) :
		_globals(globals),
		_write_index(start_index)
	{
	}

	void consume(PrintWorkUnit& work_unit) {
		std::unique_lock<std::mutex> guard(_write_mutex);
		_write_condition.wait(guard, [&](){ return work_unit.event_range().location() == this->_write_index; });

		ASSERT(work_unit.event_range().location() == _write_index, "Sanity");

		char* data = work_unit.buffer().data();
		size_t bytes = work_unit.buffer_end() - data;
		write(_globals.output_fd(), work_unit.buffer().data(), bytes);
		_write_index = work_unit.event_range().max();

		_write_condition.notify_all();
	}
};

template <typename SIZE>
uintptr_t print_machine_events(const Globals& globals, const Machine<SIZE>& machine) {
	print_event_header(globals, SIZE::is_64_bit);

	if (const KDEvent<SIZE>* events = machine.events()) {
		if (uintptr_t event_count = machine.event_count()) {

			//
			// We want to chunk this up into reasonably sized pieces of work.
			// Because each piece of work can potentially accumulate a large
			// amount of memory, we need to limit the amount of work "in-flight".
			//
			uint32_t active_cpus = Kernel::active_cpu_count();

			uintptr_t chunk_size = 2000;

			PrintProducer<SIZE> producer(globals, machine, chunk_size);
			PrintConsumer<SIZE> consumer(globals, machine, producer.start_index());

			std::vector<std::thread> threads;
			for (uint32_t i=0; i<active_cpus; ++i) {
				threads.push_back(std::thread([&]() {
					PrintWorkUnit work_unit(MemoryBuffer<char>(160 * chunk_size), TRange<uintptr_t>(0, 0), (char*)NULL);
					while (producer.produce(work_unit)) {
						consumer.consume(work_unit);
					}
				}));
			}

			for(auto& thread : threads){
				thread.join();
			}

			uint32_t totalProcesses = 0;
			uint32_t totalThreads = 0;

			for (auto process : machine.processes()) {
				if (!process->is_created_by_previous_machine_state()) {
					totalProcesses++;
				}
			}

			for (auto thread : machine.threads()) {
				if (!thread->is_created_by_previous_machine_state()) {
					totalThreads++;
				}
			}

			dprintf(globals.output_fd(), "Total Events:       %llu\n", (uint64_t)event_count);
			dprintf(globals.output_fd(), "Total Processes:    %u\n", totalProcesses);
			dprintf(globals.output_fd(), "Total Threads:      %u\n", totalThreads);

			return event_count;
		}
	}
	
	return 0;
}

#endif
Commit	Line	Data
bd6521f0 A	1	//
	2	// EventPrinting.hpp
	3	// kdprof
	4	//
	5	// Created by James McIlree on 4/20/13.
	6	// Copyright (c) 2013 Apple. All rights reserved.
	7	//
	8
	9	#ifndef kdprof_ParallelPrinting_hpp
	10	#define kdprof_ParallelPrinting_hpp
	11
	12	void print_event_header(const Globals& globals, bool is_64_bit);
	13
	14	template <typename SIZE>
	15	char* print_event(char* buf, char* buf_end, const Globals& globals, const Machine<SIZE>& machine, const KDEvent<SIZE>& event, uintptr_t event_index)
	16	{
	17	// Header is...
	18	//
	19	// [Index] Time Type Code arg1 arg2 arg3 arg4 thread cpu# command/IOP-name pid
	20	// 8 16 4 34 8/16 8/16 8/16 8/16 10 4 16 6
	21	//
	22	// For now, each column is folding up the "after" spacing in a single printf, IOW
	23	//
	24	// buf += snprintf(buf, buf_end - buf, "%8s ", "COL"); /* + 2 spaces */
	25	//
	26	// Not:
	27	//
	28	// buf += snprintf(buf, buf_end - buf, "%8s", "COL");
	29	// buf += snprintf(buf, buf_end - buf, " "); /* 2 spaces */
	30
	31	ASSERT(event.cpu() > -1 && event.cpu() < machine.cpus().size(), "cpu_id out of range");
	32	const MachineCPU<SIZE>& cpu = machine.cpus()[event.cpu()];
	33
	34	//
	35	// Okay, here is how snprintf works.
	36	//
	37	// char buf[2];
	38	//
	39	// snprintf(buf, 0, "a"); // Returns 1, buf is unchanged.
	40	// snprintf(buf, 1, "a"); // Returns 1, buf = \0
	41	// snprintf(buf, 2, "a"); // Returns 1, buf = 'a', \0
	42
	43	//
	44	// If we cannot print successfully, we return the orignal pointer.
	45	//
	46	char* orig_buf = buf;
	47
	48	//
	49	// [Index]
	50	//
	51	if (globals.should_print_event_index()) {
	52	buf += snprintf(buf, buf_end - buf, "%8llu ", (uint64_t)event_index);
	53	}
	54
	55	if (buf >= buf_end)
	56	return orig_buf;
	57
	58	//
	59	// Time
	60	//
	61	if (globals.should_print_mach_absolute_timestamps()) {
	62	if (globals.beginning_of_time().value() == 0)
	63	buf += snprintf(buf, buf_end - buf, "%16llX ", (event.timestamp() - globals.beginning_of_time()).value());
	64	else
65	buf += snprintf(buf, buf_end - buf, "%16llu ", (event.timestamp() - globals.beginning_of_time()).value());
66	} else {
67	NanoTime ntime = (event.timestamp() - globals.beginning_of_time()).nano_time(globals.timebase());
68	buf += snprintf(buf, buf_end - buf, "%16.2f ", (double)ntime.value() / 1000.0);
69	}
70
71	if (buf >= buf_end)
72	return orig_buf;
73
74	//
75	// Type Code
76	//
77	const char* type = event.is_func_start() ? "beg" : (event.is_func_end() ? "end" : "---");
78	auto trace_code_it = globals.trace_codes().find(event.dbg_cooked());
79	if (cpu.is_iop() \|\| !globals.should_print_symbolic_event_codes() \|\| trace_code_it == globals.trace_codes().end()) {
80	buf += snprintf(buf, buf_end - buf, "%4s %-34x ", type, event.dbg_cooked());
81	} else {
82	buf += snprintf(buf, buf_end - buf, "%4s %-34s ", type, trace_code_it->second.c_str());
83	}
84
85	if (buf >= buf_end)
86	return orig_buf;
87
88	//
89	// arg1
90	//
91	if (event.dbg_class() == DBG_IOKIT && event.dbg_subclass() == DBG_IOPOWER) {
92	std::string kext_name = event.arg1_as_string();
93	std::reverse(kext_name.begin(), kext_name.end());
94
95	if (SIZE::is_64_bit)
96	buf += snprintf(buf, buf_end - buf, "%-16s ", kext_name.c_str());
97	else
98	buf += snprintf(buf, buf_end - buf, "%-8s ", kext_name.c_str());
99	} else {
100	if (SIZE::is_64_bit)
101	buf += snprintf(buf, buf_end - buf, "%-16llX ", (uint64_t)event.arg1());
102	else
103	buf += snprintf(buf, buf_end - buf, "%-8x ", (uint32_t)event.arg1());
104	}
105
106	if (buf >= buf_end)
107	return orig_buf;
108
109	//
110	// Profiling showed that the repeated snprintf calls were hot, rolling them up is ~2.5% per on a HUGE file.
111	//
112	// arg2 arg3 arg4 thread cpu
113	//
114	if (SIZE::is_64_bit)
115	buf += snprintf(buf, buf_end - buf, "%-16llX %-16llX %-16llX %10llX %4u ", (uint64_t)event.arg2(), (uint64_t)event.arg3(), (uint64_t)event.arg4(), (uint64_t)event.tid(), event.cpu());
116	else
117	buf += snprintf(buf, buf_end - buf, "%-8x %-8x %-8x %10llX %4u ", (uint32_t)event.arg2(), (uint32_t)event.arg3(), (uint32_t)event.arg4(), (uint64_t)event.tid(), event.cpu());
118
119	if (buf >= buf_end)
120	return orig_buf;
121
122	//
123	// command & pid (handled together due to IOP not printing a pid
124	//
125	if (cpu.is_iop()) {
126	// We print the IOP name instead of a command
127	buf += snprintf(buf, buf_end - buf, "%-16s\n", cpu.name());
128	} else {
129	if (const MachineThread<SIZE>* thread = machine.thread(event.tid(), event.timestamp())) {
130	buf += snprintf(buf, buf_end - buf, "%-16s %-6d\n", thread->process().name(), thread->process().pid());
131	} else {
132	buf += snprintf(buf, buf_end - buf, "%-16s %-6s\n", "?????", "???");
133	}
134	}
135
136	// Still need to check this, its an error if we overflow on the last print!
137	if (buf >= buf_end)
138	return orig_buf;
139
140	return buf;
141	}
142
143	template <typename SIZE>
144	char* print_event_range_to_buffer(const Globals& globals, const Machine<SIZE>& machine, TRange<uintptr_t> range, MemoryBuffer<char>& buffer ) {
145	char* cursor = buffer.data();
146	char* cursor_end = cursor + buffer.capacity();
147
148	if (const KDEvent<SIZE>* events = machine.events()) {
149	ASSERT(TRange<uintptr_t>(0, machine.event_count()).contains(range), "Sanity");
150	for (uintptr_t index = range.location(); index < range.max(); ++index) {
151	char* temp = print_event(cursor, cursor_end, globals, machine, events[index], index);
152	if (temp != cursor)
153	cursor = temp;
154	else {
155	// Changing the capacity will invalidate the cursor
156	ptrdiff_t offset = cursor - buffer.data();
157	buffer.set_capacity(buffer.capacity()*2);
158	cursor = buffer.data() + offset;
159	cursor_end = buffer.data() + buffer.capacity();
160	}
161	}
162	}
163
164	return cursor;
165	}
166
167	class PrintWorkUnit {
168	protected:
169	MemoryBuffer<char> _buffer;
170	TRange<uintptr_t> _event_range;
171	char* _buffer_end;
172
173	// We do not want work units copied.
174	PrintWorkUnit(const PrintWorkUnit& that) = delete;
175	PrintWorkUnit& operator=(const PrintWorkUnit& other) = delete;
176
177	public:
178	PrintWorkUnit(MemoryBuffer<char>&& buffer, TRange<uintptr_t> event_range, char* buffer_end) :
179	_buffer(std::move(buffer)),
180	_event_range(event_range),
181	_buffer_end(buffer_end)
182	{
183	ASSERT(_buffer.capacity(), "Sanity");
184	ASSERT(_buffer.data(), "Sanity");
185	ASSERT(!_buffer_end \|\| _buffer_end > _buffer.data(), "Sanity");
186	ASSERT(!_buffer_end \|\| (_buffer_end < _buffer.data() + _buffer.capacity()), "Sanity");
187	}
188
189	MemoryBuffer<char>& buffer() { return _buffer; }
190
191	TRange<uintptr_t> event_range() { return _event_range; }
192	void set_event_range(TRange<uintptr_t> range) { _event_range = range; }
193
194	char* buffer_end() const { return _buffer_end; }
195	void set_buffer_end(char* buffer_end) { _buffer_end = buffer_end; }
196	};
197
198	template <typename SIZE>
199	class PrintProducer {
200	protected:
201	const Globals& _globals;
202	const Machine<SIZE>& _machine;
203	uintptr_t _start_index;
204	uintptr_t _end_index;
205	uintptr_t _chunk_size;
206
207	public:
208	PrintProducer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t chunk_size) :
209	_globals(globals),
210	_machine(machine),
211	_chunk_size(chunk_size)
212	{
213	_start_index = 0;
214	_end_index = machine.event_count();
215
216	if (globals.is_summary_start_set() \|\| globals.is_summary_stop_set()) {
217	AbsInterval machine_timespan = machine.timespan();
218
219	KDEvent<SIZE> start_event(globals.summary_start(machine_timespan));
220	auto it = std::lower_bound(machine.events(), machine.events() + _end_index, start_event);
221	ASSERT(&*it >= machine.events(), "Returned start index lower than start");
222	_start_index = std::distance(machine.events(), it);
223
224	KDEvent<SIZE> end_event(globals.summary_stop(machine_timespan));
225	it = std::lower_bound(machine.events(), machine.events() + _end_index, end_event);
226	ASSERT(&*it <= machine.events() + _end_index, "Returned end index greater than end");
227	_end_index = std::distance(machine.events(), it);
228
229	ASSERT(_start_index <= _end_index, "start index is > end index");
230	}
231	}
232
233	bool produce(PrintWorkUnit& work_unit) {
234	// Claim a chunk of work to do
235	uintptr_t orig_start_index, new_start_index;
236	do {
237	orig_start_index = _start_index;
238	new_start_index = orig_start_index + std::min(_chunk_size, _end_index - orig_start_index);
239	} while (orig_start_index < _end_index && !OSAtomicCompareAndSwapPtrBarrier((void)orig_start_index, (void )new_start_index, (void * volatile *)&_start_index));
240
241	// Did we claim work?
242	if (orig_start_index < _end_index) {
243	TRange<uintptr_t> event_range(orig_start_index, new_start_index - orig_start_index);
244	char* end = print_event_range_to_buffer(_globals, _machine, event_range, work_unit.buffer());
245
246	work_unit.set_event_range(event_range);
247	work_unit.set_buffer_end(end);
248	return true;
249	}
250
251	return false;
252	}
253
254	uintptr_t start_index() const { return _start_index; }
255	};
256
257	template <typename SIZE>
258	class PrintConsumer {
259	protected:
260	const Globals& _globals;
261	uintptr_t _write_index;
262	std::mutex _write_mutex;
263	std::condition_variable _write_condition;
264
265	public:
266	PrintConsumer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t start_index) :
267	_globals(globals),
268	_write_index(start_index)
269	{
270	}
271
272	void consume(PrintWorkUnit& work_unit) {
273	std::unique_lock<std::mutex> guard(_write_mutex);
274	_write_condition.wait(guard, [&](){ return work_unit.event_range().location() == this->_write_index; });
275
276	ASSERT(work_unit.event_range().location() == _write_index, "Sanity");
277
278	char* data = work_unit.buffer().data();
279	size_t bytes = work_unit.buffer_end() - data;
280	write(_globals.output_fd(), work_unit.buffer().data(), bytes);
281	_write_index = work_unit.event_range().max();
282
283	_write_condition.notify_all();
284	}
285	};
286
287	template <typename SIZE>
288	uintptr_t print_machine_events(const Globals& globals, const Machine<SIZE>& machine) {
289	print_event_header(globals, SIZE::is_64_bit);
290
291	if (const KDEvent<SIZE>* events = machine.events()) {
292	if (uintptr_t event_count = machine.event_count()) {
293
294	//
295	// We want to chunk this up into reasonably sized pieces of work.
296	// Because each piece of work can potentially accumulate a large
297	// amount of memory, we need to limit the amount of work "in-flight".
298	//
299	uint32_t active_cpus = Kernel::active_cpu_count();
300
301	uintptr_t chunk_size = 2000;
302
303	PrintProducer<SIZE> producer(globals, machine, chunk_size);
304	PrintConsumer<SIZE> consumer(globals, machine, producer.start_index());
305
306	std::vector<std::thread> threads;
307	for (uint32_t i=0; i<active_cpus; ++i) {
308	threads.push_back(std::thread([&]() {
309	PrintWorkUnit work_unit(MemoryBuffer<char>(160 * chunk_size), TRange<uintptr_t>(0, 0), (char*)NULL);
310	while (producer.produce(work_unit)) {
311	consumer.consume(work_unit);
312	}
313	}));
314	}
315
316	for(auto& thread : threads){
317	thread.join();
318	}
319
320	uint32_t totalProcesses = 0;
321	uint32_t totalThreads = 0;
322
323	for (auto process : machine.processes()) {
324	if (!process->is_created_by_previous_machine_state()) {
325	totalProcesses++;
326	}
327	}
328
329	for (auto thread : machine.threads()) {
330	if (!thread->is_created_by_previous_machine_state()) {
331	totalThreads++;
332	}
333	}
334
335	dprintf(globals.output_fd(), "Total Events: %llu\n", (uint64_t)event_count);
336	dprintf(globals.output_fd(), "Total Processes: %u\n", totalProcesses);
337	dprintf(globals.output_fd(), "Total Threads: %u\n", totalThreads);
338
339	return event_count;
340	}
341	}
342
343	return 0;
344	}
345
346	#endif