]>
Commit | Line | Data |
---|---|---|
1c79356b A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
43866e37 | 6 | * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. |
1c79356b | 7 | * |
43866e37 A |
8 | * This file contains Original Code and/or Modifications of Original Code |
9 | * as defined in and that are subject to the Apple Public Source License | |
10 | * Version 2.0 (the 'License'). You may not use this file except in | |
11 | * compliance with the License. Please obtain a copy of the License at | |
12 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
13 | * file. | |
14 | * | |
15 | * The Original Code and all software distributed under the License are | |
16 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
17 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
18 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
43866e37 A |
19 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
20 | * Please see the License for the specific language governing rights and | |
21 | * limitations under the License. | |
1c79356b A |
22 | * |
23 | * @APPLE_LICENSE_HEADER_END@ | |
24 | */ | |
25 | /* | |
26 | * @OSF_COPYRIGHT@ | |
27 | */ | |
28 | /* | |
29 | * HISTORY | |
30 | * | |
31 | * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez | |
32 | * Import of Mac OS X kernel (~semeria) | |
33 | * | |
34 | * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez | |
35 | * Import of OSF Mach kernel (~mburg) | |
36 | * | |
37 | * Revision 1.1.7.1 1997/09/22 17:41:24 barbou | |
38 | * MP+RT: protect cpu_number() usage against preemption. | |
39 | * [97/09/16 barbou] | |
40 | * | |
41 | * Revision 1.1.5.1 1995/01/06 19:53:37 devrcs | |
42 | * mk6 CR668 - 1.3b26 merge | |
43 | * new file for mk6 | |
44 | * [1994/10/12 22:25:20 dwm] | |
45 | * | |
46 | * Revision 1.1.2.2 1994/05/16 19:19:17 meissner | |
47 | * Add support for converting 64-bit integers to a decimal string. | |
48 | * Use the correct address (selfpc) when creating the prof header for gprof. | |
49 | * [1994/04/28 21:44:59 meissner] | |
50 | * | |
51 | * Revision 1.1.2.1 1994/04/08 17:51:42 meissner | |
52 | * Make most stats 64 bits, except for things like memory allocation. | |
53 | * [1994/04/02 14:58:21 meissner] | |
54 | * | |
55 | * Do not provide old mcount support under MK or server. | |
56 | * Fixup stats size so it is the same as in profile-md.h. | |
57 | * [1994/03/29 21:00:03 meissner] | |
58 | * | |
59 | * Use faster sequence for overflow addition. | |
60 | * Keep {dummy,prof,gprof,old}_mcount counts in double precision. | |
61 | * Add kernel NCPUS > 1 support. | |
62 | * [1994/03/17 20:13:23 meissner] | |
63 | * | |
64 | * Add gprof/prof overflow support | |
65 | * [1994/03/17 14:56:44 meissner] | |
66 | * | |
67 | * Add size of histogram counters & unused fields to profile_profil struct | |
68 | * [1994/02/17 21:41:44 meissner] | |
69 | * | |
70 | * Add too_low/too_high to profile_stats. | |
71 | * [1994/02/16 22:38:11 meissner] | |
72 | * | |
73 | * Bump # allocation contexts to 32 from 16. | |
74 | * Store unique ptr address in gprof function header structure for _profile_reset. | |
75 | * Add new fields from profile-{internal,md}.h. | |
76 | * Align loop looking for an unlocked acontext. | |
77 | * Count # times a locked context block was found. | |
78 | * Expand copyright. | |
79 | * [1994/02/07 12:40:56 meissner] | |
80 | * | |
81 | * Keep track of the number of times the kernel overflows the HISTCOUNTER counter. | |
82 | * [1994/02/03 20:13:23 meissner] | |
83 | * | |
84 | * Add stats for {user,kernel,idle} mode in the kernel. | |
85 | * [1994/02/03 15:17:22 meissner] | |
86 | * | |
87 | * No change. | |
88 | * [1994/02/03 00:58:49 meissner] | |
89 | * | |
90 | * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars. | |
91 | * [1994/02/01 12:03:56 meissner] | |
92 | * | |
93 | * Move _mcount_ptr to be closer to other data declarations. | |
94 | * Add text_len to profile_profil structure for mk. | |
95 | * Split records_cnt into prof_cnt/gprof_cnt. | |
96 | * Always update prof_cnt/gprof_cnt even if not DO_STATS. | |
97 | * Add current/max cpu indicator to stats for kernel. | |
98 | * [1994/01/28 23:33:20 meissner] | |
99 | * | |
100 | * Don't do 4+Lgotoff(lab), use separate labels. | |
101 | * Change GPROF_HASH_SHIFT to 9 (from 8). | |
102 | * [1994/01/26 22:00:59 meissner] | |
103 | * | |
104 | * Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads. | |
105 | * [1994/01/26 20:30:57 meissner] | |
106 | * | |
107 | * Move callback pointers into separate allocation context. | |
108 | * Add size fields for other structures to profile-vars. | |
109 | * Allocate string table as one large allocation. | |
110 | * Rewrite old mcount code once again. | |
111 | * Use multiply to make hash value, not divide. | |
112 | * Hash table is now a power of two. | |
113 | * [1994/01/26 20:23:32 meissner] | |
114 | * | |
115 | * Cut hash table size back to 16189. | |
116 | * Add size fields to all structures. | |
117 | * Add major/minor version number to _profile_md. | |
118 | * Move allocation context block pointers to _profile_vars. | |
119 | * Move _gprof_dummy after _profile_md. | |
120 | * New function header code now falls into hash an element | |
121 | * to avoid having the hash code duplicated or use a macro. | |
122 | * Fix bug in _gprof_mcount with ELF shared libraries. | |
123 | * [1994/01/25 01:45:59 meissner] | |
124 | * | |
125 | * Move init functions to C code; rearrange profil varaibles. | |
126 | * [1994/01/22 01:11:14 meissner] | |
127 | * | |
128 | * No change. | |
129 | * [1994/01/20 20:56:43 meissner] | |
130 | * | |
131 | * Fixup copyright. | |
132 | * [1994/01/18 23:07:39 meissner] | |
133 | * | |
134 | * Make flags byte-sized. | |
135 | * Add have_bb flag. | |
136 | * Add init_format flag. | |
137 | * Always put word size multipler first in .space. | |
138 | * [1994/01/18 21:57:14 meissner] | |
139 | * | |
140 | * Fix elfpic problems in last change. | |
141 | * [1994/01/16 14:04:26 meissner] | |
142 | * | |
143 | * Rewrite gprof caching to be faster & not need a lock. | |
144 | * Record prof information for gprof too. | |
145 | * Bump reserved stats to 64. | |
146 | * Bump up hash table size 30799. | |
147 | * Conditionally use lock prefix. | |
148 | * Change most #ifdef's to #if. | |
149 | * DEBUG_PROFILE turns on stack frames now. | |
150 | * Conditionally add externs to gprof to determine where time is spent. | |
151 | * Prof_mcount uses xchgl to update function pointer. | |
152 | * [1994/01/15 18:40:33 meissner] | |
153 | * | |
154 | * Fix a comment. | |
155 | * Separate statistics from debugging (though debugging turns it on). | |
156 | * Remove debug code that traces each gprof request. | |
157 | * [1994/01/15 00:59:02 meissner] | |
158 | * | |
159 | * Move max hash bucket calculation into _gprof_write & put info in stats structure. | |
160 | * [1994/01/04 16:15:14 meissner] | |
161 | * | |
162 | * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to. | |
163 | * [1994/01/04 15:37:44 meissner] | |
164 | * | |
165 | * Add more allocation memory pools (gprof function hdrs in particular). | |
166 | * For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time. | |
167 | * Add major/minor version numbers to _profile_{vars,stats}. | |
168 | * Add # profil buckets field to _profil_stats. | |
169 | * [19 | |
170 | * | |
171 | * $EndLog$ | |
172 | */ | |
173 | ||
174 | /* | |
175 | * Common 386 profiling module that is shared between the kernel, mach | |
176 | * servers, and the user space library. Each environment includes | |
177 | * this file. | |
178 | */ | |
179 | ||
180 | .file "profile-asm.s" | |
181 | ||
182 | #include <cpus.h> | |
183 | ||
184 | #include <machine/asm.h> | |
185 | ||
186 | /* | |
187 | * By default, debugging turns on statistics and stack frames. | |
188 | */ | |
189 | ||
190 | #if DEBUG_PROFILE | |
191 | #ifndef DO_STATS | |
192 | #define DO_STATS 1 | |
193 | #endif | |
194 | ||
195 | #ifndef STACK_FRAMES | |
196 | #define STACK_FRAMES 1 | |
197 | #endif | |
198 | #endif | |
199 | ||
200 | #ifndef OLD_MCOUNT | |
201 | #define OLD_MCOUNT 0 /* do not compile old code for mcount */ | |
202 | #endif | |
203 | ||
204 | #ifndef DO_STATS | |
205 | #define DO_STATS 1 /* compile in statistics code */ | |
206 | #endif | |
207 | ||
208 | #ifndef DO_LOCK | |
209 | #define DO_LOCK 0 /* use lock; in front of increments */ | |
210 | #endif | |
211 | ||
212 | #ifndef LOCK_STATS | |
213 | #define LOCK_STATS DO_LOCK /* update stats with lock set */ | |
214 | #endif | |
215 | ||
216 | #ifndef STACK_FRAMES | |
217 | #define STACK_FRAMES 0 /* create stack frames for debugger */ | |
218 | #endif | |
219 | ||
220 | #ifndef NO_RECURSIVE_ALLOC | |
221 | #define NO_RECURSIVE_ALLOC 0 /* check for recursive allocs */ | |
222 | /* (not thread safe!) */ | |
223 | #endif | |
224 | ||
225 | #ifndef MARK_GPROF | |
226 | #define MARK_GPROF 0 /* add externs for gprof profiling */ | |
227 | #endif | |
228 | ||
229 | #ifndef OVERFLOW | |
230 | #define OVERFLOW 1 /* add overflow checking support */ | |
231 | #endif | |
232 | ||
233 | /* | |
234 | * Turn on the use of the lock prefix if desired. | |
235 | */ | |
236 | ||
237 | #ifndef LOCK | |
238 | #if DO_LOCK | |
239 | #define LOCK lock; | |
240 | #else | |
241 | #define LOCK | |
242 | #endif | |
243 | #endif | |
244 | ||
245 | #ifndef SLOCK | |
246 | #if LOCK_STATS | |
247 | #define SLOCK LOCK | |
248 | #else | |
249 | #define SLOCK | |
250 | #endif | |
251 | #endif | |
252 | ||
253 | /* | |
254 | * Double or single precision incrementing | |
255 | */ | |
256 | ||
257 | #if OVERFLOW | |
258 | #define DINC(mem) LOCK addl $1,mem; LOCK adcl $0,4+mem | |
259 | #define DINC2(mem,mem2) LOCK addl $1,mem; LOCK adcl $0,mem2 | |
260 | #define SDINC(mem) SLOCK addl $1,mem; SLOCK adcl $0,4+mem | |
261 | #define SDADD(val,mem) SLOCK addl val,mem; SLOCK adcl $0,4+mem | |
262 | #define SDADDNEG(val,mem) SLOCK subl val,mem; SLOCK adcl $0,4+mem | |
263 | #define SDSUB(val,mem) SLOCK subl val,mem; SLOCK sbbl $0,4+mem | |
264 | ||
265 | #else | |
266 | #define DINC(mem) LOCK incl mem | |
267 | #define DINC2(mem,mem2) LOCK incl mem | |
268 | #define SDINC(mem) SLOCK incl mem | |
269 | #define SDADD(val,mem) SLOCK addl val,mem | |
270 | #define SDADDNEG(val,mem) SLOCK subl val,mem | |
271 | #define SDSUB(val,mem) SLOCK subl val,mem | |
272 | #endif | |
273 | ||
274 | /* | |
275 | * Stack frame support so that debugger traceback works. | |
276 | */ | |
277 | ||
278 | #if STACK_FRAMES | |
279 | #define ENTER pushl %ebp; movl %esp,%ebp | |
280 | #define LEAVE0 popl %ebp | |
281 | #define Estack 4 | |
282 | #else | |
283 | #define ENTER | |
284 | #define LEAVE0 | |
285 | #define Estack 0 | |
286 | #endif | |
287 | ||
288 | /* | |
289 | * Gprof profiling. | |
290 | */ | |
291 | ||
292 | #if MARK_GPROF | |
293 | #define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name) | |
294 | #else | |
295 | #define MARK(name) | |
296 | #endif | |
297 | ||
298 | /* | |
299 | * Profiling allocation context block. Each time memory is needed, the | |
300 | * allocator loops until it finds an unlocked context block, and allocates | |
301 | * from that block. If no context blocks are available, a new memory | |
302 | * pool is allocated, and added to the end of the chain. | |
303 | */ | |
304 | ||
305 | LCL(A_next) = 0 /* next context block link (must be 0) */ | |
306 | LCL(A_plist) = LCL(A_next)+4 /* head of page list for context block */ | |
307 | LCL(A_lock) = LCL(A_plist)+4 /* lock word */ | |
308 | LCL(A_size) = LCL(A_lock)+4 /* size of context block */ | |
309 | ||
310 | #define A_next LCL(A_next) | |
311 | #define A_plist LCL(A_plist) | |
312 | #define A_lock LCL(A_lock) | |
313 | #define A_size LCL(A_size) | |
314 | ||
315 | /* | |
316 | * Allocation contexts used. | |
317 | */ | |
318 | ||
319 | LCL(C_prof) = 0 /* prof records */ | |
320 | LCL(C_gprof) = 1 /* gprof arc records */ | |
321 | LCL(C_gfunc) = 2 /* gprof function headers */ | |
322 | LCL(C_misc) = 3 /* misc. allocations */ | |
323 | LCL(C_profil) = 4 /* memory for profil */ | |
324 | LCL(C_dci) = 5 /* memory for dci */ | |
325 | LCL(C_bb) = 6 /* memory for basic blocks */ | |
326 | LCL(C_callback) = 7 /* memory for callbacks */ | |
327 | LCL(C_max) = 32 /* # allocation contexts */ | |
328 | ||
329 | #define C_prof LCL(C_prof) | |
330 | #define C_gprof LCL(C_gprof) | |
331 | #define C_gfunc LCL(C_gfunc) | |
332 | #define C_max LCL(C_max) | |
333 | ||
334 | /* | |
335 | * Linked list of memory allocations. | |
336 | */ | |
337 | ||
338 | LCL(M_first) = 0 /* pointer to first byte available */ | |
339 | LCL(M_ptr) = LCL(M_first)+4 /* pointer to next available byte */ | |
340 | LCL(M_next) = LCL(M_ptr)+4 /* next page allocated */ | |
341 | LCL(M_nfree) = LCL(M_next)+4 /* # bytes available */ | |
342 | LCL(M_nalloc) = LCL(M_nfree)+4 /* # bytes allocated */ | |
343 | LCL(M_num) = LCL(M_nalloc)+4 /* # allocations done on this page */ | |
344 | LCL(M_size) = LCL(M_num)+4 /* size of page header */ | |
345 | ||
346 | #define M_first LCL(M_first) | |
347 | #define M_ptr LCL(M_ptr) | |
348 | #define M_next LCL(M_next) | |
349 | #define M_nfree LCL(M_nfree) | |
350 | #define M_nalloc LCL(M_nalloc) | |
351 | #define M_num LCL(M_num) | |
352 | #define M_size LCL(M_size) | |
353 | ||
354 | /* | |
355 | * Prof data type. | |
356 | */ | |
357 | ||
358 | LCL(P_addr) = 0 /* function address */ | |
359 | LCL(P_count) = LCL(P_addr)+4 /* # times function called */ | |
360 | LCL(P_overflow) = LCL(P_count)+4 /* # times count overflowed */ | |
361 | LCL(P_size) = LCL(P_overflow)+4 /* size of prof data type */ | |
362 | ||
363 | #define P_addr LCL(P_addr) | |
364 | #define P_count LCL(P_count) | |
365 | #define P_overflow LCL(P_overflow) | |
366 | #define P_size LCL(P_size) | |
367 | ||
368 | /* | |
369 | * Gprof data type. | |
370 | */ | |
371 | ||
372 | LCL(G_next) = 0 /* next hash link (must be 0) */ | |
373 | LCL(G_frompc) = LCL(G_next)+4 /* caller's caller */ | |
374 | LCL(G_selfpc) = LCL(G_frompc)+4 /* caller's address */ | |
375 | LCL(G_count) = LCL(G_selfpc)+4 /* # times arc traversed */ | |
376 | LCL(G_overflow) = LCL(G_count)+4 /* # times count overflowed */ | |
377 | LCL(G_size) = LCL(G_overflow)+4 /* size of gprof data type */ | |
378 | ||
379 | #define G_next LCL(G_next) | |
380 | #define G_frompc LCL(G_frompc) | |
381 | #define G_selfpc LCL(G_selfpc) | |
382 | #define G_count LCL(G_count) | |
383 | #define G_overflow LCL(G_overflow) | |
384 | #define G_size LCL(G_size) | |
385 | ||
386 | /* | |
387 | * Gprof header. | |
388 | * | |
389 | * At least one header is allocated for each unique function that is profiled. | |
390 | * In order to save time calculating the hash value, the last H_maxcache | |
391 | * distinct arcs are cached within this structure. Also, to avoid loading | |
392 | * the GOT when searching the hash table, we copy the hash pointer to this | |
393 | * structure, so that we only load the GOT when we need to allocate an arc. | |
394 | */ | |
395 | ||
396 | LCL(H_maxcache) = 3 /* # of cache table entries */ | |
397 | LCL(H_csize) = 4*LCL(H_maxcache) /* size of each cache array */ | |
398 | ||
399 | LCL(H_hash_ptr) = 0 /* hash table to use */ | |
400 | LCL(H_unique_ptr) = LCL(H_hash_ptr)+4 /* function unique pointer */ | |
401 | LCL(H_prof) = LCL(H_unique_ptr)+4 /* prof statistics */ | |
402 | LCL(H_cache_ptr) = LCL(H_prof)+P_size /* cache table of element pointers */ | |
403 | LCL(H_size) = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */ | |
404 | ||
405 | #define H_maxcache LCL(H_maxcache) | |
406 | #define H_csize LCL(H_csize) | |
407 | #define H_hash_ptr LCL(H_hash_ptr) | |
408 | #define H_unique_ptr LCL(H_unique_ptr) | |
409 | #define H_prof LCL(H_prof) | |
410 | #define H_cache_ptr LCL(H_cache_ptr) | |
411 | #define H_size LCL(H_size) | |
412 | ||
413 | /* | |
414 | * Number of digits needed to write a 64 bit number including trailing null. | |
415 | * (rounded up to be divisable by 4). | |
416 | */ | |
417 | ||
418 | #define N_digit 24 | |
419 | ||
420 | \f | |
421 | .data | |
422 | ||
423 | /* | |
424 | * Default gprof hash table size, which must be a power of two. | |
425 | * The shift specifies how many low order bits to eliminate when | |
426 | * calculating the hash value. | |
427 | */ | |
428 | ||
429 | #ifndef GPROF_HASH_SIZE | |
430 | #define GPROF_HASH_SIZE 16384 | |
431 | #endif | |
432 | ||
433 | #ifndef GPROF_HASH_SHIFT | |
434 | #define GPROF_HASH_SHIFT 9 | |
435 | #endif | |
436 | ||
437 | #define GPROF_HASH_MASK (GPROF_HASH_SIZE-1) | |
438 | ||
439 | DATA(_profile_hash_size) | |
440 | .long GPROF_HASH_SIZE | |
441 | ENDDATA(_profile_hash_size) | |
442 | ||
443 | \f | |
444 | ||
445 | /* | |
446 | * Pointer that the compiler uses to call to the appropriate mcount function. | |
447 | */ | |
448 | ||
449 | DATA(_mcount_ptr) | |
450 | .long EXT(_dummy_mcount) | |
451 | ENDDATA(_mcount_ptr) | |
452 | ||
453 | /* | |
454 | * Global profile variables. The structure that accesses this in C is declared | |
455 | * in profile-internal.h. All items in .data that follow this will be used as | |
456 | * one giant record, and each unique machine, thread, kgmon output or what have | |
457 | * you will create a separate instance. Typically there is only one instance | |
458 | * which will be the memory laid out below. | |
459 | */ | |
460 | ||
461 | LCL(var_major_version) = 0 /* major version number */ | |
462 | LCL(var_minor_version) = LCL(var_major_version)+4 /* minor version number */ | |
463 | LCL(vars_size) = LCL(var_minor_version)+4 /* size of _profile_vars structure */ | |
464 | LCL(plist_size) = LCL(vars_size)+4 /* size of page_list structure */ | |
465 | LCL(acontext_size) = LCL(plist_size)+4 /* size of allocation contexts */ | |
466 | LCL(callback_size) = LCL(acontext_size)+4 /* size of callback structure */ | |
467 | LCL(type) = LCL(callback_size)+4 /* profile type (gprof, prof) */ | |
468 | LCL(error_msg) = LCL(type)+4 /* error message for perror */ | |
469 | LCL(filename) = LCL(error_msg)+4 /* filename to write to */ | |
470 | LCL(str_ptr) = LCL(filename)+4 /* string table pointer */ | |
471 | LCL(stream) = LCL(str_ptr)+4 /* stdio stream to write to */ | |
472 | LCL(diag_stream) = LCL(stream)+4 /* stdio stream to write diagnostics to */ | |
473 | LCL(fwrite_func) = LCL(diag_stream)+4 /* function like fwrite to output bytes */ | |
474 | LCL(page_size) = LCL(fwrite_func)+4 /* page size in bytes */ | |
475 | LCL(str_bytes) = LCL(page_size)+4 /* # bytes in string table */ | |
476 | LCL(str_total) = LCL(str_bytes)+4 /* # total bytes allocated for string table */ | |
477 | LCL(clock_ticks) = LCL(str_total)+4 /* # clock ticks per second */ | |
478 | ||
479 | /* profil variables */ | |
480 | LCL(profil_start) = LCL(clock_ticks)+4 /* start of profil variables */ | |
481 | LCL(lowpc) = LCL(clock_ticks)+4 /* lowest address */ | |
482 | LCL(highpc) = LCL(lowpc)+4 /* highest address */ | |
483 | LCL(text_len) = LCL(highpc)+4 /* highpc-lowpc */ | |
484 | LCL(profil_len) = LCL(text_len)+4 /* size of profil buffer */ | |
485 | LCL(counter_size) = LCL(profil_len)+4 /* size of indivual counter */ | |
486 | LCL(scale) = LCL(counter_size)+4 /* scale factor */ | |
487 | LCL(profil_unused) = LCL(scale)+4 /* unused fields */ | |
488 | LCL(profil_end) = LCL(profil_unused)+4*8 /* end of profil_info structure */ | |
489 | LCL(profil_buf) = LCL(profil_end) /* buffer for profil */ | |
490 | ||
491 | /* Output selection func ptrs */ | |
492 | LCL(output_init) = LCL(profil_buf)+4 /* Initialization */ | |
493 | LCL(output) = LCL(output_init)+4 /* Write out profiling info */ | |
494 | LCL(output_ptr) = LCL(output)+4 /* Output specific data ptr */ | |
495 | ||
496 | /* Memory allocation support */ | |
497 | LCL(acontext) = LCL(output_ptr)+4 /* pointers to allocation context blocks */ | |
498 | ||
499 | LCL(bogus_func) = LCL(acontext)+4*C_max /* function to use if gprof arc is bad */ | |
500 | LCL(vars_unused) = LCL(bogus_func)+4 /* future growth */ | |
501 | ||
502 | /* flags */ | |
503 | LCL(init) = LCL(vars_unused)+4*63 /* whether initializations were done */ | |
504 | LCL(active) = LCL(init)+1 /* whether profiling is active */ | |
505 | LCL(do_profile) = LCL(active)+1 /* whether to do profiling */ | |
506 | LCL(use_dci) = LCL(do_profile)+1 /* whether to use DCI */ | |
507 | LCL(use_profil) = LCL(use_dci)+1 /* whether to use profil */ | |
508 | LCL(recursive_alloc) = LCL(use_profil)+1 /* alloc called recursively */ | |
509 | LCL(output_uarea) = LCL(recursive_alloc)+1 /* output uarea */ | |
510 | LCL(output_stats) = LCL(output_uarea)+1 /* output stats info */ | |
511 | LCL(output_clock) = LCL(output_stats)+1 /* output the clock ticks */ | |
512 | LCL(multiple_sections) = LCL(output_clock)+1 /* multiple sections are ok */ | |
513 | LCL(have_bb) = LCL(multiple_sections)+1 /* whether we have basic block data */ | |
514 | LCL(init_format) = LCL(have_bb)+1 /* The output format has been chosen */ | |
515 | LCL(debug) = LCL(init_format)+1 /* Whether or not we are debugging */ | |
516 | LCL(check_funcs) = LCL(debug)+1 /* Whether to check functions for validity */ | |
517 | LCL(flag_unused) = LCL(check_funcs)+1 /* unused flags */ | |
518 | LCL(end_of_vars) = LCL(flag_unused)+62 /* size of machine independent vars */ | |
519 | ||
520 | /* | |
521 | * Data that contains profile statistics that can be dumped out | |
522 | * into the {,g}mon.out file. This is defined in profile-md.h. | |
523 | */ | |
524 | ||
525 | LCL(stats_start) = LCL(end_of_vars) /* start of stats substructure */ | |
526 | LCL(stats_major_version)= LCL(stats_start) /* major version number */ | |
527 | LCL(stats_minor_version)= LCL(stats_major_version)+4 /* minor version number */ | |
528 | LCL(stats_size) = LCL(stats_minor_version)+4 /* size of _profile_stats structure */ | |
529 | LCL(profil_buckets) = LCL(stats_size)+4 /* # profil buckets */ | |
530 | LCL(my_cpu) = LCL(profil_buckets)+4 /* identify which cpu/thread this is */ | |
531 | LCL(max_cpu) = LCL(my_cpu)+4 /* identify which cpu/thread this is */ | |
532 | LCL(prof_records) = LCL(max_cpu)+4 /* # of profiled functions */ | |
533 | LCL(gprof_records) = LCL(prof_records)+4 /* # of gprof arcs created */ | |
534 | LCL(hash_buckets) = LCL(gprof_records)+4 /* max gprof hash buckets on a chain */ | |
535 | LCL(bogus_count) = LCL(hash_buckets)+4 /* # bogus functions found in gprof */ | |
536 | ||
537 | LCL(cnt) = LCL(bogus_count)+4 /* # of _{prof,gprof}_mcount calls */ | |
538 | LCL(dummy) = LCL(cnt)+8 /* # of _dummy_mcount calls */ | |
539 | LCL(old_mcount) = LCL(dummy)+8 /* # of old mcount calls */ | |
540 | LCL(hash_search) = LCL(old_mcount)+8 /* # gprof hash buckets searched */ | |
541 | LCL(hash_num) = LCL(hash_search)+8 /* # times hash table searched */ | |
542 | LCL(user_ticks) = LCL(hash_num)+8 /* # ticks within user space */ | |
543 | LCL(kernel_ticks) = LCL(user_ticks)+8 /* # ticks within kernel space */ | |
544 | LCL(idle_ticks) = LCL(kernel_ticks)+8 /* # ticks cpu was idle */ | |
545 | LCL(overflow_ticks) = LCL(idle_ticks)+8 /* # ticks where histcounter overflowed */ | |
546 | LCL(acontext_locked) = LCL(overflow_ticks)+8 /* # times an acontext was locked */ | |
547 | LCL(too_low) = LCL(acontext_locked)+8 /* # times histogram tick too low */ | |
548 | LCL(too_high) = LCL(too_low)+8 /* # times histogram tick too low */ | |
549 | LCL(prof_overflow) = LCL(too_high)+8 /* # times the prof count field overflowed */ | |
550 | LCL(gprof_overflow) = LCL(prof_overflow)+8 /* # times the gprof count field overflowed */ | |
551 | LCL(num_alloc) = LCL(gprof_overflow)+8 /* # allocations in each context */ | |
552 | LCL(bytes_alloc) = LCL(num_alloc)+4*C_max /* bytes allocated in each context */ | |
553 | LCL(num_context) = LCL(bytes_alloc)+4*C_max /* # allocation context blocks */ | |
554 | LCL(wasted) = LCL(num_context)+4*C_max /* # bytes wasted */ | |
555 | LCL(overhead) = LCL(wasted)+4*C_max /* # bytes of overhead */ | |
556 | LCL(buckets) = LCL(overhead)+4*C_max /* # hash indexes that have n buckets */ | |
557 | LCL(cache_hits1) = LCL(buckets)+4*10 /* # gprof cache hits in bucket #1 */ | |
558 | LCL(cache_hits2) = LCL(cache_hits1)+8 /* # gprof cache hits in bucket #2 */ | |
559 | LCL(cache_hits3) = LCL(cache_hits2)+8 /* # gprof cache hits in bucket #3 */ | |
560 | LCL(stats_unused) = LCL(cache_hits3)+8 /* reserved for future use */ | |
561 | LCL(stats_end) = LCL(stats_unused)+8*64 /* end of stats structure */ | |
562 | ||
563 | /* | |
564 | * Machine dependent variables that no C file should access (except for | |
565 | * profile-md.c). | |
566 | */ | |
567 | ||
568 | LCL(md_start) = LCL(stats_end) /* start of md structure */ | |
569 | LCL(md_major_version) = LCL(md_start) /* major version number */ | |
570 | LCL(md_minor_version) = LCL(md_major_version)+4 /* minor version number */ | |
571 | LCL(md_size) = LCL(md_minor_version)+4 /* size of _profile_stats structure */ | |
572 | LCL(hash_ptr) = LCL(md_size)+4 /* gprof hash pointer */ | |
573 | LCL(hash_size) = LCL(hash_ptr)+4 /* gprof hash size */ | |
574 | LCL(num_cache) = LCL(hash_size)+4 /* # of cache entries */ | |
575 | LCL(save_mcount_ptr) = LCL(num_cache)+4 /* save for mcount_ptr when suspending profiling */ | |
576 | LCL(mcount_ptr_ptr) = LCL(save_mcount_ptr)+4 /* pointer to _mcount_ptr */ | |
577 | LCL(dummy_ptr) = LCL(mcount_ptr_ptr)+4 /* pointer to gprof_dummy */ | |
578 | LCL(alloc_pages) = LCL(dummy_ptr)+4 /* allocate more memory */ | |
579 | LCL(num_buffer) = LCL(alloc_pages)+4 /* buffer to convert 64 bit ints in */ | |
580 | LCL(md_unused) = LCL(num_buffer)+N_digit /* unused fields */ | |
581 | LCL(md_end) = LCL(md_unused)+4*58 /* end of md structure */ | |
582 | LCL(total_size) = LCL(md_end) /* size of entire structure */ | |
583 | ||
584 | /* | |
585 | * Size of the entire _profile_vars structure. | |
586 | */ | |
587 | ||
588 | DATA(_profile_size) | |
589 | .long LCL(total_size) | |
590 | ENDDATA(_profile_size) | |
591 | ||
592 | /* | |
593 | * Size of the statistics substructure. | |
594 | */ | |
595 | ||
596 | DATA(_profile_stats_size) | |
597 | .long LCL(stats_end)-LCL(stats_start) | |
598 | ENDDATA(_profile_stats_size) | |
599 | ||
600 | /* | |
601 | * Size of the profil info substructure. | |
602 | */ | |
603 | ||
604 | DATA(_profile_profil_size) | |
605 | .long LCL(profil_end)-LCL(profil_start) | |
606 | ENDDATA(_profile_profil_size) | |
607 | ||
608 | /* | |
609 | * Size of the machine dependent substructure. | |
610 | */ | |
611 | ||
612 | DATA(_profile_md_size) | |
613 | .long LCL(md_end)-LCL(md_start) | |
614 | ENDDATA(_profile_profil_size) | |
615 | ||
616 | /* | |
617 | * Whether statistics are supported. | |
618 | */ | |
619 | ||
620 | DATA(_profile_do_stats) | |
621 | .long DO_STATS | |
622 | ENDDATA(_profile_do_stats) | |
623 | ||
624 | .text | |
625 | ||
626 | /* | |
627 | * Map LCL(xxx) -> into simpler names | |
628 | */ | |
629 | ||
630 | #define V_acontext LCL(acontext) | |
631 | #define V_acontext_locked LCL(acontext_locked) | |
632 | #define V_alloc_pages LCL(alloc_pages) | |
633 | #define V_bogus_func LCL(bogus_func) | |
634 | #define V_bytes_alloc LCL(bytes_alloc) | |
635 | #define V_cache_hits1 LCL(cache_hits1) | |
636 | #define V_cache_hits2 LCL(cache_hits2) | |
637 | #define V_cache_hits3 LCL(cache_hits3) | |
638 | #define V_cnt LCL(cnt) | |
639 | #define V_cnt_overflow LCL(cnt_overflow) | |
640 | #define V_check_funcs LCL(check_funcs) | |
641 | #define V_dummy LCL(dummy) | |
642 | #define V_dummy_overflow LCL(dummy_overflow) | |
643 | #define V_dummy_ptr LCL(dummy_ptr) | |
644 | #define V_gprof_records LCL(gprof_records) | |
645 | #define V_hash_num LCL(hash_num) | |
646 | #define V_hash_ptr LCL(hash_ptr) | |
647 | #define V_hash_search LCL(hash_search) | |
648 | #define V_mcount_ptr_ptr LCL(mcount_ptr_ptr) | |
649 | #define V_num_alloc LCL(num_alloc) | |
650 | #define V_num_buffer LCL(num_buffer) | |
651 | #define V_num_context LCL(num_context) | |
652 | #define V_old_mcount LCL(old_mcount) | |
653 | #define V_old_mcount_overflow LCL(old_mcount_overflow) | |
654 | #define V_overhead LCL(overhead) | |
655 | #define V_page_size LCL(page_size) | |
656 | #define V_prof_records LCL(prof_records) | |
657 | #define V_recursive_alloc LCL(recursive_alloc) | |
658 | #define V_wasted LCL(wasted) | |
659 | ||
660 | /* | |
661 | * Loadup %ebx with the address of _profile_vars. On a multiprocessor, this | |
662 | * will loads up the appropriate machine's _profile_vars structure. | |
663 | * For ELF shared libraries, rely on the fact that we won't need a GOT, | |
664 | * except to load this pointer. | |
665 | */ | |
666 | ||
667 | #if defined (MACH_KERNEL) && NCPUS > 1 | |
668 | #define ASSEMBLER | |
55e303ae | 669 | #include <i386/mp.h> |
1c79356b A |
670 | |
671 | #if SQT | |
672 | #include <i386/SQT/asm_macros.h> | |
673 | #endif | |
674 | ||
675 | #ifndef CPU_NUMBER | |
676 | #error "Cannot determine how to get CPU number" | |
677 | #endif | |
678 | ||
679 | #define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx | |
680 | ||
681 | #else /* not kernel or not multiprocessor */ | |
682 | #define Vload Gload; Egaddr(%ebx,_profile_vars) | |
683 | #endif | |
684 | ||
685 | \f | |
686 | /* | |
687 | * Allocate some memory for profiling. This memory is guaranteed to | |
688 | * be zero. | |
689 | * %eax contains the memory size requested and will contain ptr on exit. | |
690 | * %ebx contains the address of the appropriate profile_vars structure. | |
691 | * %ecx is the number of the memory pool to allocate from (trashed on exit). | |
692 | * %edx is trashed. | |
693 | * %esi is preserved. | |
694 | * %edi is preserved. | |
695 | * %ebp is preserved. | |
696 | */ | |
697 | ||
698 | Entry(_profile_alloc_asm) | |
699 | ENTER | |
700 | pushl %esi | |
701 | pushl %edi | |
702 | ||
703 | movl %ecx,%edi /* move context number to saved reg */ | |
704 | ||
705 | #if NO_RECURSIVE_ALLOC | |
706 | movb $-1,%cl | |
707 | xchgb %cl,V_recursive_alloc(%ebx) | |
708 | cmpb $0,%cl | |
709 | je LCL(no_recurse) | |
710 | ||
711 | int $3 | |
712 | ||
713 | .align ALIGN | |
714 | LCL(no_recurse): | |
715 | #endif | |
716 | ||
717 | leal V_acontext(%ebx,%edi,4),%ecx | |
718 | ||
719 | /* Loop looking for a free allocation context. */ | |
720 | /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */ | |
721 | /* %edi = context number */ | |
722 | ||
723 | .align ALIGN | |
724 | LCL(alloc_loop): | |
725 | movl %ecx,%esi /* save ptr in case no more contexts */ | |
726 | movl A_next(%ecx),%ecx /* next context block */ | |
727 | cmpl $0,%ecx | |
728 | je LCL(alloc_context) /* need to allocate a new context block */ | |
729 | ||
730 | movl $-1,%edx | |
731 | xchgl %edx,A_lock(%ecx) /* %edx == 0 if context available */ | |
732 | ||
733 | #if DO_STATS | |
734 | SDADDNEG(%edx,V_acontext_locked(%ebx)) /* increment counter if lock was held */ | |
735 | #endif | |
736 | ||
737 | cmpl $0,%edx | |
738 | jne LCL(alloc_loop) /* go back if this context block is not available */ | |
739 | ||
740 | /* Allocation context found (%ecx), now allocate. */ | |
741 | movl A_plist(%ecx),%edx /* pointer to current block */ | |
742 | cmpl $0,%edx /* first allocation? */ | |
743 | je LCL(alloc_new) | |
744 | ||
745 | cmpl %eax,M_nfree(%edx) /* see if we have enough space */ | |
746 | jl LCL(alloc_new) /* jump if not enough space */ | |
747 | ||
748 | /* Allocate from local block (and common exit) */ | |
749 | /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */ | |
750 | /* %edi = context number */ | |
751 | ||
752 | .align ALIGN | |
753 | LCL(alloc_ret): | |
754 | ||
755 | #if DO_STATS | |
756 | SLOCK incl V_num_alloc(%ebx,%edi,4) /* update global counters */ | |
757 | SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4) | |
758 | SLOCK subl %eax,V_wasted(%ebx,%edi,4) | |
759 | #endif | |
760 | ||
761 | movl M_ptr(%edx),%esi /* pointer return value */ | |
762 | subl %eax,M_nfree(%edx) /* decrement bytes remaining */ | |
763 | addl %eax,M_nalloc(%edx) /* increment bytes allocated */ | |
764 | incl M_num(%edx) /* increment # allocations */ | |
765 | addl %eax,M_ptr(%edx) /* advance pointer */ | |
766 | movl $0,A_lock(%ecx) /* unlock context block */ | |
767 | movl %esi,%eax /* return pointer */ | |
768 | ||
769 | #if NO_RECURSIVE_ALLOC | |
770 | movb $0,V_recursive_alloc(%ebx) | |
771 | #endif | |
772 | ||
773 | popl %edi | |
774 | popl %esi | |
775 | LEAVE0 | |
776 | ret /* return to the caller */ | |
777 | ||
778 | /* Allocate space in whole number of pages */ | |
779 | /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */ | |
780 | /* %edi = context number */ | |
781 | ||
782 | .align ALIGN | |
783 | LCL(alloc_new): | |
784 | pushl %eax /* save regs */ | |
785 | pushl %ecx | |
786 | movl V_page_size(%ebx),%edx | |
787 | addl $(M_size-1),%eax /* add in overhead size & subtract 1 */ | |
788 | decl %edx /* page_size - 1 */ | |
789 | addl %edx,%eax /* round up to whole number of pages */ | |
790 | notl %edx | |
791 | andl %edx,%eax | |
792 | leal -M_size(%eax),%esi /* save allocation size */ | |
793 | pushl %eax /* argument to _profile_alloc_pages */ | |
794 | call *V_alloc_pages(%ebx) /* allocate some memory */ | |
795 | addl $4,%esp /* pop off argument */ | |
796 | ||
797 | #if DO_STATS | |
798 | SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* udpate global counters */ | |
55e303ae | 799 | SLOCK addl $(M_size),V_overhead(%ebx,%edi,4) |
1c79356b A |
800 | #endif |
801 | ||
802 | popl %ecx /* context block */ | |
803 | movl %eax,%edx /* memory block pointer */ | |
804 | movl %esi,M_nfree(%edx) /* # free bytes */ | |
55e303ae | 805 | addl $(M_size),%eax /* bump past overhead */ |
1c79356b A |
806 | movl A_plist(%ecx),%esi /* previous memory block or 0 */ |
807 | movl %eax,M_first(%edx) /* first space available */ | |
808 | movl %eax,M_ptr(%edx) /* current address available */ | |
809 | movl %esi,M_next(%edx) /* next memory block allocated */ | |
810 | movl %edx,A_plist(%ecx) /* update current page list */ | |
811 | popl %eax /* user size request */ | |
812 | jmp LCL(alloc_ret) /* goto common return code */ | |
813 | ||
814 | /* Allocate a context header in addition to memory block header + data */ | |
815 | /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */ | |
816 | /* %edi = context number */ | |
817 | ||
818 | .align ALIGN | |
819 | LCL(alloc_context): | |
820 | pushl %eax /* save regs */ | |
821 | pushl %esi | |
822 | movl V_page_size(%ebx),%edx | |
823 | addl $(A_size+M_size-1),%eax /* add in overhead size & subtract 1 */ | |
824 | decl %edx /* page_size - 1 */ | |
825 | addl %edx,%eax /* round up to whole number of pages */ | |
826 | notl %edx | |
827 | andl %edx,%eax | |
828 | leal -A_size-M_size(%eax),%esi /* save allocation size */ | |
829 | pushl %eax /* argument to _profile_alloc_pages */ | |
830 | call *V_alloc_pages(%ebx) /* allocate some memory */ | |
831 | addl $4,%esp /* pop off argument */ | |
832 | ||
833 | #if DO_STATS | |
834 | SLOCK incl V_num_context(%ebx,%edi,4) /* bump # context blocks */ | |
835 | SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* update global counters */ | |
836 | SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4) | |
837 | #endif | |
838 | ||
839 | movl %eax,%ecx /* context pointer */ | |
840 | leal A_size(%eax),%edx /* memory block pointer */ | |
841 | movl %esi,M_nfree(%edx) /* # free bytes */ | |
842 | addl $(A_size+M_size),%eax /* bump past overhead */ | |
843 | movl %eax,M_first(%edx) /* first space available */ | |
844 | movl %eax,M_ptr(%edx) /* current address available */ | |
845 | movl $0,M_next(%edx) /* next memory block allocated */ | |
846 | movl %edx,A_plist(%ecx) /* head of memory block list */ | |
847 | movl $1,A_lock(%ecx) /* set lock */ | |
848 | popl %esi /* ptr to store context block link */ | |
849 | movl %ecx,%eax /* context pointer temp */ | |
850 | xchgl %eax,A_next(%esi) /* link into chain */ | |
851 | movl %eax,A_next(%ecx) /* add links in case of threading */ | |
852 | popl %eax /* user size request */ | |
853 | jmp LCL(alloc_ret) /* goto common return code */ | |
854 | ||
855 | END(_profile_alloc_asm) | |
856 | ||
857 | /* | |
858 | * C callable version of the profile memory allocator. | |
859 | * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t); | |
860 | */ | |
861 | ||
862 | Entry(_profile_alloc) | |
863 | ENTER | |
864 | pushl %ebx | |
865 | movl 12+Estack(%esp),%eax /* memory size */ | |
866 | movl 8+Estack(%esp),%ebx /* provile_vars address */ | |
867 | addl $3,%eax /* round up to word boundary */ | |
868 | movl 16+Estack(%esp),%ecx /* which memory pool to allocate from */ | |
869 | andl $0xfffffffc,%eax | |
870 | call EXT(_profile_alloc_asm) | |
871 | popl %ebx | |
872 | LEAVE0 | |
873 | ret | |
874 | END(_profile_alloc) | |
875 | ||
876 | \f | |
877 | /* | |
878 | * Dummy mcount routine that just returns. | |
879 | * | |
880 | * +-------------------------------+ | |
881 | * | | | |
882 | * | | | |
883 | * | caller's caller stack, | | |
884 | * | saved registers, params. | | |
885 | * | | | |
886 | * | | | |
887 | * +-------------------------------+ | |
888 | * | caller's caller return addr. | | |
889 | * +-------------------------------+ | |
890 | * esp --> | caller's return address | | |
891 | * +-------------------------------+ | |
892 | * | |
893 | * edx --> function unqiue LCL | |
894 | */ | |
895 | ||
896 | Entry(_dummy_mcount) | |
897 | ENTER | |
898 | ||
899 | #if DO_STATS | |
900 | pushl %ebx | |
901 | MP_DISABLE_PREEMPTION(%ebx) | |
902 | Vload | |
903 | SDINC(V_dummy(%ebx)) | |
904 | MP_ENABLE_PREEMPTION(%ebx) | |
905 | popl %ebx | |
906 | #endif | |
907 | ||
908 | LEAVE0 | |
909 | ret | |
910 | END(_dummy_mcount) | |
911 | ||
912 | \f | |
913 | /* | |
914 | * Entry point for System V based profiling, count how many times each function | |
915 | * is called. The function label is passed in %edx, and the top two words on | |
916 | * the stack are the caller's address, and the caller's return address. | |
917 | * | |
918 | * +-------------------------------+ | |
919 | * | | | |
920 | * | | | |
921 | * | caller's caller stack, | | |
922 | * | saved registers, params. | | |
923 | * | | | |
924 | * | | | |
925 | * +-------------------------------+ | |
926 | * | caller's caller return addr. | | |
927 | * +-------------------------------+ | |
928 | * esp --> | caller's return address | | |
929 | * +-------------------------------+ | |
930 | * | |
931 | * edx --> function unique label | |
932 | * | |
933 | * We don't worry about the possibility about two threads calling | |
934 | * the same function for the first time simulataneously. If that | |
935 | * happens, two records will be created, and one of the records | |
936 | * address will be stored in in the function unique label (which | |
937 | * is aligned by the compiler, so we don't have to watch out for | |
938 | * crossing page/cache boundaries). | |
939 | */ | |
940 | ||
941 | Entry(_prof_mcount) | |
942 | ENTER | |
943 | ||
944 | #if DO_STATS | |
945 | pushl %ebx | |
946 | MP_DISABLE_PREEMPTION(%ebx) | |
947 | Vload | |
948 | SDINC(V_cnt(%ebx)) | |
949 | #endif | |
950 | ||
951 | movl (%edx),%eax /* initialized? */ | |
952 | cmpl $0,%eax | |
953 | je LCL(pnew) | |
954 | ||
955 | DINC2(P_count(%eax),P_overflow(%eax)) /* bump function count (double precision) */ | |
956 | ||
957 | #if DO_STATS | |
958 | MP_ENABLE_PREEMPTION(%ebx) | |
959 | popl %ebx | |
960 | #endif | |
961 | ||
962 | LEAVE0 | |
963 | ret | |
964 | ||
965 | .align ALIGN | |
966 | LCL(pnew): | |
967 | ||
968 | #if !DO_STATS | |
969 | pushl %ebx | |
970 | MP_DISABLE_PREEMPTION(%ebx) | |
971 | Vload | |
972 | #endif | |
973 | ||
974 | SLOCK incl V_prof_records(%ebx) | |
975 | pushl %edx | |
55e303ae A |
976 | movl $(P_size),%eax /* allocation size */ |
977 | movl $(C_prof),%ecx /* allocation pool */ | |
1c79356b A |
978 | call EXT(_profile_alloc_asm) /* allocate a new record */ |
979 | popl %edx | |
980 | ||
981 | movl Estack+4(%esp),%ecx /* caller's address */ | |
982 | movl %ecx,P_addr(%eax) | |
983 | movl $1,P_count(%eax) /* call count */ | |
984 | xchgl %eax,(%edx) /* update function header */ | |
985 | MP_ENABLE_PREEMPTION(%ebx) | |
986 | popl %ebx | |
987 | LEAVE0 | |
988 | ret | |
989 | ||
990 | END(_prof_mcount) | |
991 | ||
992 | \f | |
993 | /* | |
994 | * Entry point for BSD based graph profiling, count how many times each unique | |
995 | * call graph (caller + callee) is called. The function label is passed in | |
996 | * %edx, and the top two words on the stack are the caller's address, and the | |
997 | * caller's return address. | |
998 | * | |
999 | * +-------------------------------+ | |
1000 | * | | | |
1001 | * | | | |
1002 | * | caller's caller stack, | | |
1003 | * | saved registers, params. | | |
1004 | * | | | |
1005 | * | | | |
1006 | * +-------------------------------+ | |
1007 | * | caller's caller return addr. | | |
1008 | * +-------------------------------+ | |
1009 | * esp --> | caller's return address | | |
1010 | * +-------------------------------+ | |
1011 | * | |
1012 | * edx --> function unqiue label | |
1013 | * | |
1014 | * We don't worry about the possibility about two threads calling the same | |
1015 | * function simulataneously. If that happens, two records will be created, and | |
1016 | * one of the records address will be stored in in the function unique label | |
1017 | * (which is aligned by the compiler). | |
1018 | * | |
1019 | * By design, the gprof header is not locked. Each of the cache pointers is | |
1020 | * always a valid pointer (possibily to a null record), and if another thread | |
1021 | * comes in and modifies the pointer, it does so automatically with a simple store. | |
1022 | * Since all arcs are in the hash table, the caches are just to avoid doing | |
1023 | * a multiplication in the common case, and if they don't match, the arcs will | |
1024 | * still be found. | |
1025 | */ | |
1026 | ||
1027 | Entry(_gprof_mcount) | |
1028 | ||
1029 | ENTER | |
1030 | movl Estack+4(%esp),%ecx /* caller's caller address */ | |
1031 | ||
1032 | #if DO_STATS | |
1033 | pushl %ebx | |
1034 | MP_DISABLE_PREEMPTION(%ebx) | |
1035 | Vload | |
1036 | SDINC(V_cnt(%ebx)) /* bump profile call counter (double int) */ | |
1037 | #endif | |
1038 | ||
1039 | movl (%edx),%eax /* Gprof header allocated? */ | |
1040 | cmpl $0,%eax | |
1041 | je LCL(gnew) /* skip if first call */ | |
1042 | ||
1043 | DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax)) /* bump function count */ | |
1044 | ||
1045 | /* See if this call arc is the same as the last time */ | |
1046 | MARK(_gprof_mcount_cache1) | |
1047 | movl H_cache_ptr(%eax),%edx /* last arc searched */ | |
1048 | cmpl %ecx,G_frompc(%edx) /* skip if not equal */ | |
1049 | jne LCL(gcache2) | |
1050 | ||
1051 | /* Same as last time, increment and return */ | |
1052 | ||
1053 | DINC2(G_count(%edx),G_overflow(%edx)) /* bump arc count */ | |
1054 | ||
1055 | #if DO_STATS | |
1056 | SDINC(V_cache_hits1(%ebx)) /* update counter */ | |
1057 | MP_ENABLE_PREEMPTION(%ebx) | |
1058 | popl %ebx | |
1059 | #endif | |
1060 | ||
1061 | LEAVE0 | |
1062 | ret | |
1063 | ||
1064 | /* Search second cache entry */ | |
1065 | /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */ | |
1066 | /* %edx = first arc searched */ | |
1067 | /* %ebx if DO_STATS pushed on stack */ | |
1068 | ||
1069 | .align ALIGN | |
1070 | MARK(_gprof_mcount_cache2) | |
1071 | LCL(gcache2): | |
1072 | pushl %esi /* get a saved register */ | |
1073 | movl H_cache_ptr+4(%eax),%esi /* 2nd arc to be searched */ | |
1074 | cmpl %ecx,G_frompc(%esi) /* skip if not equal */ | |
1075 | jne LCL(gcache3) | |
1076 | ||
1077 | /* Element found, increment, reset last arc searched and return */ | |
1078 | ||
1079 | DINC2(G_count(%esi),G_overflow(%esi)) /* bump arc count */ | |
1080 | ||
1081 | movl %esi,H_cache_ptr+0(%eax) /* swap 1st and 2nd cached arcs */ | |
1082 | popl %esi | |
1083 | movl %edx,H_cache_ptr+4(%eax) | |
1084 | ||
1085 | #if DO_STATS | |
1086 | SDINC(V_cache_hits2(%ebx)) /* update counter */ | |
1087 | MP_ENABLE_PREEMPTION(%ebx) | |
1088 | popl %ebx | |
1089 | #endif | |
1090 | ||
1091 | LEAVE0 | |
1092 | ret | |
1093 | ||
1094 | /* Search third cache entry */ | |
1095 | /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */ | |
1096 | /* %edx = first arc searched, %esi = second arc searched */ | |
1097 | /* %esi, %ebx if DO_STATS pushed on stack */ | |
1098 | ||
1099 | .align ALIGN | |
1100 | MARK(_gprof_mcount_cache3) | |
1101 | LCL(gcache3): | |
1102 | pushl %edi | |
1103 | movl H_cache_ptr+8(%eax),%edi /* 3rd arc to be searched */ | |
1104 | cmpl %ecx,G_frompc(%edi) /* skip if not equal */ | |
1105 | jne LCL(gnocache) | |
1106 | ||
1107 | /* Element found, increment, reset last arc searched and return */ | |
1108 | ||
1109 | DINC2(G_count(%edi),G_overflow(%edi)) /* bump arc count */ | |
1110 | ||
1111 | movl %edi,H_cache_ptr+0(%eax) /* make this 1st cached arc */ | |
1112 | movl %esi,H_cache_ptr+8(%eax) | |
1113 | movl %edx,H_cache_ptr+4(%eax) | |
1114 | popl %edi | |
1115 | popl %esi | |
1116 | ||
1117 | #if DO_STATS | |
1118 | SDINC(V_cache_hits3(%ebx)) /* update counter */ | |
1119 | MP_ENABLE_PREEMPTION(%ebx) | |
1120 | popl %ebx | |
1121 | #endif | |
1122 | ||
1123 | LEAVE0 | |
1124 | ret | |
1125 | ||
1126 | /* No function context, allocate a new context */ | |
1127 | /* %ebx is the variables address if DO_STATS */ | |
1128 | /* %ecx is the caller's caller's address */ | |
1129 | /* %edx is the unique function pointer */ | |
1130 | /* %ebx if DO_STATS pushed on stack */ | |
1131 | ||
1132 | .align ALIGN | |
1133 | MARK(_gprof_mcount_new) | |
1134 | LCL(gnew): | |
1135 | pushl %esi | |
1136 | pushl %edi | |
1137 | ||
1138 | #if !DO_STATS | |
1139 | pushl %ebx /* Address of vars needed for alloc */ | |
1140 | MP_DISABLE_PREEMPTION(%ebx) | |
1141 | Vload /* stats already loaded address */ | |
1142 | #endif | |
1143 | ||
1144 | SLOCK incl V_prof_records(%ebx) | |
1145 | movl %edx,%esi /* save unique function ptr */ | |
1146 | movl %ecx,%edi /* and caller's caller address */ | |
55e303ae A |
1147 | movl $(H_size),%eax /* memory block size */ |
1148 | movl $(C_gfunc),%ecx /* gprof function header memory pool */ | |
1c79356b A |
1149 | call EXT(_profile_alloc_asm) |
1150 | ||
1151 | movl V_hash_ptr(%ebx),%ecx /* copy hash_ptr to func header */ | |
1152 | movl V_dummy_ptr(%ebx),%edx /* dummy cache entry */ | |
1153 | movl %ecx,H_hash_ptr(%eax) | |
1154 | movl %edx,H_cache_ptr+0(%eax) /* store dummy cache ptrs */ | |
1155 | movl %edx,H_cache_ptr+4(%eax) | |
1156 | movl %edx,H_cache_ptr+8(%eax) | |
1157 | movl %esi,H_unique_ptr(%eax) /* remember function unique ptr */ | |
1158 | movl Estack+12(%esp),%ecx /* caller's address */ | |
1159 | movl $1,H_prof+P_count(%eax) /* function called once so far */ | |
1160 | movl %ecx,H_prof+P_addr(%eax) /* set up prof information */ | |
1161 | movl %eax,(%esi) /* update context block address */ | |
1162 | movl %edi,%ecx /* caller's caller address */ | |
1163 | movl %edx,%esi /* 2nd cached arc */ | |
1164 | ||
1165 | #if !DO_STATS | |
1166 | popl %ebx | |
1167 | #endif | |
1168 | ||
1169 | /* Fall through to add element to the hash table. This may involve */ | |
1170 | /* searching a few hash table elements that don't need to be searched */ | |
1171 | /* since we have a new element, but it allows the hash table function */ | |
1172 | /* to be specified in only one place */ | |
1173 | ||
1174 | /* Didn't find entry in cache, search the global hash table */ | |
1175 | /* %eax = gprof func header, %ebx = vars address if DO_STATS */ | |
1176 | /* %ecx = caller's caller */ | |
1177 | /* %edx, %esi = cached arcs that were searched */ | |
1178 | /* %edi, %esi, %ebx if DO_STATS pushed on stack */ | |
1179 | ||
1180 | .align ALIGN | |
1181 | MARK(_gprof_mcount_hash) | |
1182 | LCL(gnocache): | |
1183 | ||
1184 | pushl %esi /* save 2nd arc searched */ | |
1185 | pushl %edx /* save 1st arc searched */ | |
1186 | movl %eax,%esi /* save gprof func header */ | |
1187 | ||
1188 | #if DO_STATS | |
1189 | SDINC(V_hash_num(%ebx)) | |
1190 | movl Estack+20(%esp),%edi /* caller's address */ | |
1191 | #else | |
1192 | movl Estack+16(%esp),%edi /* caller's address */ | |
1193 | #endif | |
1194 | movl %ecx,%eax /* caller's caller address */ | |
1195 | imull %edi,%eax /* multiply to get hash */ | |
1196 | movl H_hash_ptr(%esi),%edx /* hash pointer */ | |
55e303ae A |
1197 | shrl $(GPROF_HASH_SHIFT),%eax /* eliminate low order bits */ |
1198 | andl $(GPROF_HASH_MASK),%eax /* mask to get hash value */ | |
1c79356b A |
1199 | leal 0(%edx,%eax,4),%eax /* pointer to hash bucket */ |
1200 | movl %eax,%edx /* save hash bucket address */ | |
1201 | ||
1202 | /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */ | |
1203 | /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */ | |
1204 | /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ | |
1205 | ||
1206 | .align ALIGN | |
1207 | LCL(ghash): | |
1208 | movl G_next(%eax),%eax /* get next hash element */ | |
1209 | cmpl $0,%eax /* end of line? */ | |
1210 | je LCL(ghashnew) /* skip if allocate new hash */ | |
1211 | ||
1212 | #if DO_STATS | |
1213 | SDINC(V_hash_search(%ebx)) | |
1214 | #endif | |
1215 | ||
1216 | cmpl G_selfpc(%eax),%edi /* loop back if not one we want */ | |
1217 | jne LCL(ghash) | |
1218 | ||
1219 | cmpl G_frompc(%eax),%ecx /* loop back if not one we want */ | |
1220 | jne LCL(ghash) | |
1221 | ||
1222 | /* Found an entry, increment count, set up for caching, and return */ | |
1223 | /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */ | |
1224 | /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ | |
1225 | ||
1226 | DINC2(G_count(%eax),G_overflow(%eax)) /* bump arc count */ | |
1227 | ||
1228 | popl %ecx /* previous 1st arc searched */ | |
1229 | movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */ | |
1230 | popl %edi /* previous 2nd arc searched */ | |
1231 | movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */ | |
1232 | movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */ | |
1233 | popl %edi | |
1234 | popl %esi | |
1235 | ||
1236 | #if DO_STATS | |
1237 | MP_ENABLE_PREEMPTION(%ebx) | |
1238 | popl %ebx | |
1239 | #endif | |
1240 | ||
1241 | LEAVE0 | |
1242 | ret /* return to user */ | |
1243 | ||
1244 | /* Allocate new arc */ | |
1245 | /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */ | |
1246 | /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */ | |
1247 | /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ | |
1248 | ||
1249 | .align ALIGN | |
1250 | MARK(_gprof_mcount_hashnew) | |
1251 | LCL(ghashnew): | |
1252 | ||
1253 | #if !DO_STATS | |
1254 | pushl %ebx /* load address of vars if we haven't */ | |
1255 | MP_DISABLE_PREEMPTION(%ebx) | |
1256 | Vload /* already done so */ | |
1257 | #endif | |
1258 | ||
1259 | SLOCK incl V_gprof_records(%ebx) | |
1260 | pushl %edx | |
1261 | movl %ecx,%edi /* save caller's caller */ | |
55e303ae A |
1262 | movl $(G_size),%eax /* arc size */ |
1263 | movl $(C_gprof),%ecx /* gprof memory pool */ | |
1c79356b A |
1264 | call EXT(_profile_alloc_asm) |
1265 | popl %edx | |
1266 | ||
1267 | movl $1,G_count(%eax) /* set call count */ | |
1268 | movl Estack+20(%esp),%ecx /* caller's address */ | |
1269 | movl %edi,G_frompc(%eax) /* caller's caller */ | |
1270 | movl %ecx,G_selfpc(%eax) | |
1271 | ||
1272 | #if !DO_STATS | |
1273 | popl %ebx /* release %ebx if no stats */ | |
1274 | #endif | |
1275 | ||
1276 | movl (%edx),%ecx /* first hash bucket */ | |
1277 | movl %ecx,G_next(%eax) /* update link */ | |
1278 | movl %eax,%ecx /* copy for xchgl */ | |
1279 | xchgl %ecx,(%edx) /* add to hash linked list */ | |
1280 | movl %ecx,G_next(%eax) /* update in case list changed */ | |
1281 | ||
1282 | popl %ecx /* previous 1st arc searched */ | |
1283 | popl %edi /* previous 2nd arc searched */ | |
1284 | movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */ | |
1285 | movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */ | |
1286 | movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */ | |
1287 | ||
1288 | popl %edi | |
1289 | popl %esi | |
1290 | ||
1291 | #if DO_STATS | |
1292 | MP_ENABLE_PREEMPTION(%ebx) | |
1293 | popl %ebx | |
1294 | #endif | |
1295 | ||
1296 | LEAVE0 | |
1297 | ret /* return to user */ | |
1298 | ||
1299 | END(_gprof_mcount) | |
1300 | ||
1301 | \f | |
1302 | /* | |
1303 | * This function assumes that neither the caller or it's caller | |
1304 | * has not omitted the frame pointer in order to get the caller's | |
1305 | * caller. The stack looks like the following at the time of the call: | |
1306 | * | |
1307 | * +-------------------------------+ | |
1308 | * | | | |
1309 | * | | | |
1310 | * | caller's caller stack, | | |
1311 | * | saved registers, params. | | |
1312 | * | | | |
1313 | * | | | |
1314 | * +-------------------------------+ | |
1315 | * | caller's caller return addr. | | |
1316 | * +-------------------------------+ | |
1317 | * fp --> | previous frame pointer | | |
1318 | * +-------------------------------+ | |
1319 | * | | | |
1320 | * | caller's stack, saved regs, | | |
1321 | * | params. | | |
1322 | * | | | |
1323 | * +-------------------------------+ | |
1324 | * sp --> | caller's return address | | |
1325 | * +-------------------------------+ | |
1326 | * | |
1327 | * Recent versions of the compiler put the address of the pointer | |
1328 | * sized word in %edx. Previous versions did not, but this code | |
1329 | * does not support them. | |
1330 | */ | |
1331 | ||
1332 | /* | |
1333 | * Note that OSF/rose blew defining _mcount, since it prepends leading | |
1334 | * underscores, and _mcount didn't have a second leading underscore. However, | |
1335 | * some of the kernel/server functions 'know' that mcount has a leading | |
1336 | * underscore, so we satisfy both camps. | |
1337 | */ | |
1338 | ||
1339 | #if OLD_MCOUNT | |
1340 | .globl mcount | |
1341 | .globl _mcount | |
1342 | ELF_FUNC(mcount) | |
1343 | ELF_FUNC(_mcount) | |
1344 | .align FALIGN | |
1345 | _mcount: | |
1346 | mcount: | |
1347 | ||
1348 | pushl %ebx | |
1349 | MP_DISABLE_PREEMPTION(%ebx) | |
1350 | Vload | |
1351 | ||
1352 | #if DO_STATS | |
1353 | SDINC(V_old_mcount(%ebx)) | |
1354 | #endif | |
1355 | ||
1356 | /* In calling the functions, we will actually leave 1 extra word on the */ | |
1357 | /* top of the stack, but generated code will not notice, since the function */ | |
1358 | /* uses a frame pointer */ | |
1359 | ||
1360 | movl V_mcount_ptr_ptr(%ebx),%ecx /* address of mcount_ptr */ | |
1361 | MP_ENABLE_PREEMPTION(%ebx) | |
1362 | popl %ebx | |
1363 | movl 4(%ebp),%eax /* caller's caller return address */ | |
1364 | xchgl %eax,(%esp) /* push & get return address */ | |
1365 | pushl %eax /* push return address */ | |
1366 | jmp *(%ecx) /* go to profile the function */ | |
1367 | ||
1368 | End(mcount) | |
1369 | End(_mcount) | |
1370 | #endif | |
1371 | ||
1372 | \f | |
1373 | #if !defined(KERNEL) && !defined(MACH_KERNEL) | |
1374 | ||
1375 | /* | |
1376 | * Convert a 64-bit integer to a string. | |
1377 | * Arg #1 is a pointer to a string (at least 24 bytes) or NULL | |
1378 | * Arg #2 is the low part of the 64-bit integer. | |
1379 | * Arg #3 is the high part of the 64-bit integer. | |
1380 | */ | |
1381 | ||
1382 | Entry(_profile_cnt_to_decimal) | |
1383 | ENTER | |
1384 | pushl %ebx | |
1385 | pushl %esi | |
1386 | pushl %edi | |
1387 | movl Estack+16(%esp),%ebx /* pointer or null */ | |
1388 | movl Estack+20(%esp),%edi /* low part of number */ | |
1389 | movl $10,%ecx /* divisor */ | |
1390 | cmpl $0,%ebx /* skip if pointer ok */ | |
1391 | jne LCL(cvt_nonnull) | |
1392 | ||
1393 | MP_DISABLE_PREEMPTION(%ebx) | |
1394 | Vload /* get _profile_vars address */ | |
1395 | leal V_num_buffer(%ebx),%ebx /* temp buffer to use */ | |
1396 | ||
1397 | .align ALIGN | |
1398 | LCL(cvt_nonnull): | |
1399 | addl $(N_digit-1),%ebx /* point string at end */ | |
1400 | movb $0,0(%ebx) /* null terminate string */ | |
1401 | ||
1402 | #if OVERFLOW | |
1403 | movl Estack+24(%esp),%esi /* high part of number */ | |
1404 | cmpl $0,%esi /* any thing left in high part? */ | |
1405 | je LCL(cvt_low) | |
1406 | ||
1407 | .align ALIGN | |
1408 | LCL(cvt_high): | |
1409 | movl %esi,%eax /* calculate high/10 & high%10 */ | |
1410 | xorl %edx,%edx | |
1411 | divl %ecx | |
1412 | movl %eax,%esi | |
1413 | ||
1414 | movl %edi,%eax /* calculate (low + (high%10)*2^32) / 10 */ | |
1415 | divl %ecx | |
1416 | movl %eax,%edi | |
1417 | ||
1418 | decl %ebx /* decrement string pointer */ | |
1419 | addl $48,%edx /* convert from 0..9 -> '0'..'9' */ | |
1420 | movb %dl,0(%ebx) /* store digit in string */ | |
1421 | cmpl $0,%esi /* any thing left in high part? */ | |
1422 | jne LCL(cvt_high) | |
1423 | ||
1424 | #endif /* OVERFLOW */ | |
1425 | ||
1426 | .align ALIGN | |
1427 | LCL(cvt_low): | |
1428 | movl %edi,%eax /* get low part into %eax */ | |
1429 | ||
1430 | .align ALIGN | |
1431 | LCL(cvt_low2): | |
1432 | xorl %edx,%edx /* 0 */ | |
1433 | divl %ecx /* calculate next digit */ | |
1434 | decl %ebx /* decrement string pointer */ | |
1435 | addl $48,%edx /* convert from 0..9 -> '0'..'9' */ | |
1436 | movb %dl,0(%ebx) /* store digit in string */ | |
1437 | cmpl $0,%eax /* any more digits to convert? */ | |
1438 | jne LCL(cvt_low2) | |
1439 | ||
1440 | movl %ebx,%eax /* return value */ | |
1441 | popl %edi | |
1442 | popl %esi | |
1443 | MP_ENABLE_PREEMPTION(%ebx) | |
1444 | popl %ebx | |
1445 | LEAVE0 | |
1446 | ret | |
1447 | ||
1448 | END(_profile_cnt_to_decimal) | |
1449 | ||
1450 | #endif |