]> git.saurik.com Git - apple/xnu.git/blob - osfmk/profiling/i386/profile-asm.s
2386d46bf74a5b76b2437265e6cdaba599b1033e
[apple/xnu.git] / osfmk / profiling / i386 / profile-asm.s
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * HISTORY
33 *
34 * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez
35 * Import of Mac OS X kernel (~semeria)
36 *
37 * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez
38 * Import of OSF Mach kernel (~mburg)
39 *
40 * Revision 1.1.7.1 1997/09/22 17:41:24 barbou
41 * MP+RT: protect cpu_number() usage against preemption.
42 * [97/09/16 barbou]
43 *
44 * Revision 1.1.5.1 1995/01/06 19:53:37 devrcs
45 * mk6 CR668 - 1.3b26 merge
46 * new file for mk6
47 * [1994/10/12 22:25:20 dwm]
48 *
49 * Revision 1.1.2.2 1994/05/16 19:19:17 meissner
50 * Add support for converting 64-bit integers to a decimal string.
51 * Use the correct address (selfpc) when creating the prof header for gprof.
52 * [1994/04/28 21:44:59 meissner]
53 *
54 * Revision 1.1.2.1 1994/04/08 17:51:42 meissner
55 * Make most stats 64 bits, except for things like memory allocation.
56 * [1994/04/02 14:58:21 meissner]
57 *
58 * Do not provide old mcount support under MK or server.
59 * Fixup stats size so it is the same as in profile-md.h.
60 * [1994/03/29 21:00:03 meissner]
61 *
62 * Use faster sequence for overflow addition.
63 * Keep {dummy,prof,gprof,old}_mcount counts in double precision.
64 * Add kernel NCPUS > 1 support.
65 * [1994/03/17 20:13:23 meissner]
66 *
67 * Add gprof/prof overflow support
68 * [1994/03/17 14:56:44 meissner]
69 *
70 * Add size of histogram counters & unused fields to profile_profil struct
71 * [1994/02/17 21:41:44 meissner]
72 *
73 * Add too_low/too_high to profile_stats.
74 * [1994/02/16 22:38:11 meissner]
75 *
76 * Bump # allocation contexts to 32 from 16.
77 * Store unique ptr address in gprof function header structure for _profile_reset.
78 * Add new fields from profile-{internal,md}.h.
79 * Align loop looking for an unlocked acontext.
80 * Count # times a locked context block was found.
81 * Expand copyright.
82 * [1994/02/07 12:40:56 meissner]
83 *
84 * Keep track of the number of times the kernel overflows the HISTCOUNTER counter.
85 * [1994/02/03 20:13:23 meissner]
86 *
87 * Add stats for {user,kernel,idle} mode in the kernel.
88 * [1994/02/03 15:17:22 meissner]
89 *
90 * No change.
91 * [1994/02/03 00:58:49 meissner]
92 *
93 * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars.
94 * [1994/02/01 12:03:56 meissner]
95 *
96 * Move _mcount_ptr to be closer to other data declarations.
97 * Add text_len to profile_profil structure for mk.
98 * Split records_cnt into prof_cnt/gprof_cnt.
99 * Always update prof_cnt/gprof_cnt even if not DO_STATS.
100 * Add current/max cpu indicator to stats for kernel.
101 * [1994/01/28 23:33:20 meissner]
102 *
103 * Don't do 4+Lgotoff(lab), use separate labels.
104 * Change GPROF_HASH_SHIFT to 9 (from 8).
105 * [1994/01/26 22:00:59 meissner]
106 *
107 * Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads.
108 * [1994/01/26 20:30:57 meissner]
109 *
110 * Move callback pointers into separate allocation context.
111 * Add size fields for other structures to profile-vars.
112 * Allocate string table as one large allocation.
113 * Rewrite old mcount code once again.
114 * Use multiply to make hash value, not divide.
115 * Hash table is now a power of two.
116 * [1994/01/26 20:23:32 meissner]
117 *
118 * Cut hash table size back to 16189.
119 * Add size fields to all structures.
120 * Add major/minor version number to _profile_md.
121 * Move allocation context block pointers to _profile_vars.
122 * Move _gprof_dummy after _profile_md.
123 * New function header code now falls into hash an element
124 * to avoid having the hash code duplicated or use a macro.
125 * Fix bug in _gprof_mcount with ELF shared libraries.
126 * [1994/01/25 01:45:59 meissner]
127 *
128 * Move init functions to C code; rearrange profil varaibles.
129 * [1994/01/22 01:11:14 meissner]
130 *
131 * No change.
132 * [1994/01/20 20:56:43 meissner]
133 *
134 * Fixup copyright.
135 * [1994/01/18 23:07:39 meissner]
136 *
137 * Make flags byte-sized.
138 * Add have_bb flag.
139 * Add init_format flag.
140 * Always put word size multipler first in .space.
141 * [1994/01/18 21:57:14 meissner]
142 *
143 * Fix elfpic problems in last change.
144 * [1994/01/16 14:04:26 meissner]
145 *
146 * Rewrite gprof caching to be faster & not need a lock.
147 * Record prof information for gprof too.
148 * Bump reserved stats to 64.
149 * Bump up hash table size 30799.
150 * Conditionally use lock prefix.
151 * Change most #ifdef's to #if.
152 * DEBUG_PROFILE turns on stack frames now.
153 * Conditionally add externs to gprof to determine where time is spent.
154 * Prof_mcount uses xchgl to update function pointer.
155 * [1994/01/15 18:40:33 meissner]
156 *
157 * Fix a comment.
158 * Separate statistics from debugging (though debugging turns it on).
159 * Remove debug code that traces each gprof request.
160 * [1994/01/15 00:59:02 meissner]
161 *
162 * Move max hash bucket calculation into _gprof_write & put info in stats structure.
163 * [1994/01/04 16:15:14 meissner]
164 *
165 * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to.
166 * [1994/01/04 15:37:44 meissner]
167 *
168 * Add more allocation memory pools (gprof function hdrs in particular).
169 * For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time.
170 * Add major/minor version numbers to _profile_{vars,stats}.
171 * Add # profil buckets field to _profil_stats.
172 * [19
173 *
174 * $EndLog$
175 */
176
177 /*
178 * Common 386 profiling module that is shared between the kernel, mach
179 * servers, and the user space library. Each environment includes
180 * this file.
181 */
182
183 .file "profile-asm.s"
184
185 #include <machine/asm.h>
186
187 /*
188 * By default, debugging turns on statistics and stack frames.
189 */
190
191 #if DEBUG_PROFILE
192 #ifndef DO_STATS
193 #define DO_STATS 1
194 #endif
195
196 #ifndef STACK_FRAMES
197 #define STACK_FRAMES 1
198 #endif
199 #endif
200
201 #ifndef OLD_MCOUNT
202 #define OLD_MCOUNT 0 /* do not compile old code for mcount */
203 #endif
204
205 #ifndef DO_STATS
206 #define DO_STATS 1 /* compile in statistics code */
207 #endif
208
209 #ifndef DO_LOCK
210 #define DO_LOCK 0 /* use lock; in front of increments */
211 #endif
212
213 #ifndef LOCK_STATS
214 #define LOCK_STATS DO_LOCK /* update stats with lock set */
215 #endif
216
217 #ifndef STACK_FRAMES
218 #define STACK_FRAMES 0 /* create stack frames for debugger */
219 #endif
220
221 #ifndef NO_RECURSIVE_ALLOC
222 #define NO_RECURSIVE_ALLOC 0 /* check for recursive allocs */
223 /* (not thread safe!) */
224 #endif
225
226 #ifndef MARK_GPROF
227 #define MARK_GPROF 0 /* add externs for gprof profiling */
228 #endif
229
230 #ifndef OVERFLOW
231 #define OVERFLOW 1 /* add overflow checking support */
232 #endif
233
234 /*
235 * Turn on the use of the lock prefix if desired.
236 */
237
238 #ifndef LOCK
239 #if DO_LOCK
240 #define LOCK lock;
241 #else
242 #define LOCK
243 #endif
244 #endif
245
246 #ifndef SLOCK
247 #if LOCK_STATS
248 #define SLOCK LOCK
249 #else
250 #define SLOCK
251 #endif
252 #endif
253
254 /*
255 * Double or single precision incrementing
256 */
257
258 #if OVERFLOW
259 #define DINC(mem) LOCK addl $1,mem; LOCK adcl $0,4+mem
260 #define DINC2(mem,mem2) LOCK addl $1,mem; LOCK adcl $0,mem2
261 #define SDINC(mem) SLOCK addl $1,mem; SLOCK adcl $0,4+mem
262 #define SDADD(val,mem) SLOCK addl val,mem; SLOCK adcl $0,4+mem
263 #define SDADDNEG(val,mem) SLOCK subl val,mem; SLOCK adcl $0,4+mem
264 #define SDSUB(val,mem) SLOCK subl val,mem; SLOCK sbbl $0,4+mem
265
266 #else
267 #define DINC(mem) LOCK incl mem
268 #define DINC2(mem,mem2) LOCK incl mem
269 #define SDINC(mem) SLOCK incl mem
270 #define SDADD(val,mem) SLOCK addl val,mem
271 #define SDADDNEG(val,mem) SLOCK subl val,mem
272 #define SDSUB(val,mem) SLOCK subl val,mem
273 #endif
274
275 /*
276 * Stack frame support so that debugger traceback works.
277 */
278
279 #if STACK_FRAMES
280 #define ENTER pushl %ebp; movl %esp,%ebp
281 #define LEAVE0 popl %ebp
282 #define Estack 4
283 #else
284 #define ENTER
285 #define LEAVE0
286 #define Estack 0
287 #endif
288
289 /*
290 * Gprof profiling.
291 */
292
293 #if MARK_GPROF
294 #define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name)
295 #else
296 #define MARK(name)
297 #endif
298
299 /*
300 * Profiling allocation context block. Each time memory is needed, the
301 * allocator loops until it finds an unlocked context block, and allocates
302 * from that block. If no context blocks are available, a new memory
303 * pool is allocated, and added to the end of the chain.
304 */
305
306 LCL(A_next) = 0 /* next context block link (must be 0) */
307 LCL(A_plist) = LCL(A_next)+4 /* head of page list for context block */
308 LCL(A_lock) = LCL(A_plist)+4 /* lock word */
309 LCL(A_size) = LCL(A_lock)+4 /* size of context block */
310
311 #define A_next LCL(A_next)
312 #define A_plist LCL(A_plist)
313 #define A_lock LCL(A_lock)
314 #define A_size LCL(A_size)
315
316 /*
317 * Allocation contexts used.
318 */
319
320 LCL(C_prof) = 0 /* prof records */
321 LCL(C_gprof) = 1 /* gprof arc records */
322 LCL(C_gfunc) = 2 /* gprof function headers */
323 LCL(C_misc) = 3 /* misc. allocations */
324 LCL(C_profil) = 4 /* memory for profil */
325 LCL(C_dci) = 5 /* memory for dci */
326 LCL(C_bb) = 6 /* memory for basic blocks */
327 LCL(C_callback) = 7 /* memory for callbacks */
328 LCL(C_max) = 32 /* # allocation contexts */
329
330 #define C_prof LCL(C_prof)
331 #define C_gprof LCL(C_gprof)
332 #define C_gfunc LCL(C_gfunc)
333 #define C_max LCL(C_max)
334
335 /*
336 * Linked list of memory allocations.
337 */
338
339 LCL(M_first) = 0 /* pointer to first byte available */
340 LCL(M_ptr) = LCL(M_first)+4 /* pointer to next available byte */
341 LCL(M_next) = LCL(M_ptr)+4 /* next page allocated */
342 LCL(M_nfree) = LCL(M_next)+4 /* # bytes available */
343 LCL(M_nalloc) = LCL(M_nfree)+4 /* # bytes allocated */
344 LCL(M_num) = LCL(M_nalloc)+4 /* # allocations done on this page */
345 LCL(M_size) = LCL(M_num)+4 /* size of page header */
346
347 #define M_first LCL(M_first)
348 #define M_ptr LCL(M_ptr)
349 #define M_next LCL(M_next)
350 #define M_nfree LCL(M_nfree)
351 #define M_nalloc LCL(M_nalloc)
352 #define M_num LCL(M_num)
353 #define M_size LCL(M_size)
354
355 /*
356 * Prof data type.
357 */
358
359 LCL(P_addr) = 0 /* function address */
360 LCL(P_count) = LCL(P_addr)+4 /* # times function called */
361 LCL(P_overflow) = LCL(P_count)+4 /* # times count overflowed */
362 LCL(P_size) = LCL(P_overflow)+4 /* size of prof data type */
363
364 #define P_addr LCL(P_addr)
365 #define P_count LCL(P_count)
366 #define P_overflow LCL(P_overflow)
367 #define P_size LCL(P_size)
368
369 /*
370 * Gprof data type.
371 */
372
373 LCL(G_next) = 0 /* next hash link (must be 0) */
374 LCL(G_frompc) = LCL(G_next)+4 /* caller's caller */
375 LCL(G_selfpc) = LCL(G_frompc)+4 /* caller's address */
376 LCL(G_count) = LCL(G_selfpc)+4 /* # times arc traversed */
377 LCL(G_overflow) = LCL(G_count)+4 /* # times count overflowed */
378 LCL(G_size) = LCL(G_overflow)+4 /* size of gprof data type */
379
380 #define G_next LCL(G_next)
381 #define G_frompc LCL(G_frompc)
382 #define G_selfpc LCL(G_selfpc)
383 #define G_count LCL(G_count)
384 #define G_overflow LCL(G_overflow)
385 #define G_size LCL(G_size)
386
387 /*
388 * Gprof header.
389 *
390 * At least one header is allocated for each unique function that is profiled.
391 * In order to save time calculating the hash value, the last H_maxcache
392 * distinct arcs are cached within this structure. Also, to avoid loading
393 * the GOT when searching the hash table, we copy the hash pointer to this
394 * structure, so that we only load the GOT when we need to allocate an arc.
395 */
396
397 LCL(H_maxcache) = 3 /* # of cache table entries */
398 LCL(H_csize) = 4*LCL(H_maxcache) /* size of each cache array */
399
400 LCL(H_hash_ptr) = 0 /* hash table to use */
401 LCL(H_unique_ptr) = LCL(H_hash_ptr)+4 /* function unique pointer */
402 LCL(H_prof) = LCL(H_unique_ptr)+4 /* prof statistics */
403 LCL(H_cache_ptr) = LCL(H_prof)+P_size /* cache table of element pointers */
404 LCL(H_size) = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */
405
406 #define H_maxcache LCL(H_maxcache)
407 #define H_csize LCL(H_csize)
408 #define H_hash_ptr LCL(H_hash_ptr)
409 #define H_unique_ptr LCL(H_unique_ptr)
410 #define H_prof LCL(H_prof)
411 #define H_cache_ptr LCL(H_cache_ptr)
412 #define H_size LCL(H_size)
413
414 /*
415 * Number of digits needed to write a 64 bit number including trailing null.
416 * (rounded up to be divisable by 4).
417 */
418
419 #define N_digit 24
420
421 \f
422 .data
423
424 /*
425 * Default gprof hash table size, which must be a power of two.
426 * The shift specifies how many low order bits to eliminate when
427 * calculating the hash value.
428 */
429
430 #ifndef GPROF_HASH_SIZE
431 #define GPROF_HASH_SIZE 16384
432 #endif
433
434 #ifndef GPROF_HASH_SHIFT
435 #define GPROF_HASH_SHIFT 9
436 #endif
437
438 #define GPROF_HASH_MASK (GPROF_HASH_SIZE-1)
439
440 DATA(_profile_hash_size)
441 .long GPROF_HASH_SIZE
442 ENDDATA(_profile_hash_size)
443
444 \f
445
446 /*
447 * Pointer that the compiler uses to call to the appropriate mcount function.
448 */
449
450 DATA(_mcount_ptr)
451 .long EXT(_dummy_mcount)
452 ENDDATA(_mcount_ptr)
453
454 /*
455 * Global profile variables. The structure that accesses this in C is declared
456 * in profile-internal.h. All items in .data that follow this will be used as
457 * one giant record, and each unique machine, thread, kgmon output or what have
458 * you will create a separate instance. Typically there is only one instance
459 * which will be the memory laid out below.
460 */
461
462 LCL(var_major_version) = 0 /* major version number */
463 LCL(var_minor_version) = LCL(var_major_version)+4 /* minor version number */
464 LCL(vars_size) = LCL(var_minor_version)+4 /* size of _profile_vars structure */
465 LCL(plist_size) = LCL(vars_size)+4 /* size of page_list structure */
466 LCL(acontext_size) = LCL(plist_size)+4 /* size of allocation contexts */
467 LCL(callback_size) = LCL(acontext_size)+4 /* size of callback structure */
468 LCL(type) = LCL(callback_size)+4 /* profile type (gprof, prof) */
469 LCL(error_msg) = LCL(type)+4 /* error message for perror */
470 LCL(filename) = LCL(error_msg)+4 /* filename to write to */
471 LCL(str_ptr) = LCL(filename)+4 /* string table pointer */
472 LCL(stream) = LCL(str_ptr)+4 /* stdio stream to write to */
473 LCL(diag_stream) = LCL(stream)+4 /* stdio stream to write diagnostics to */
474 LCL(fwrite_func) = LCL(diag_stream)+4 /* function like fwrite to output bytes */
475 LCL(page_size) = LCL(fwrite_func)+4 /* page size in bytes */
476 LCL(str_bytes) = LCL(page_size)+4 /* # bytes in string table */
477 LCL(str_total) = LCL(str_bytes)+4 /* # total bytes allocated for string table */
478 LCL(clock_ticks) = LCL(str_total)+4 /* # clock ticks per second */
479
480 /* profil variables */
481 LCL(profil_start) = LCL(clock_ticks)+4 /* start of profil variables */
482 LCL(lowpc) = LCL(clock_ticks)+4 /* lowest address */
483 LCL(highpc) = LCL(lowpc)+4 /* highest address */
484 LCL(text_len) = LCL(highpc)+4 /* highpc-lowpc */
485 LCL(profil_len) = LCL(text_len)+4 /* size of profil buffer */
486 LCL(counter_size) = LCL(profil_len)+4 /* size of indivual counter */
487 LCL(scale) = LCL(counter_size)+4 /* scale factor */
488 LCL(profil_unused) = LCL(scale)+4 /* unused fields */
489 LCL(profil_end) = LCL(profil_unused)+4*8 /* end of profil_info structure */
490 LCL(profil_buf) = LCL(profil_end) /* buffer for profil */
491
492 /* Output selection func ptrs */
493 LCL(output_init) = LCL(profil_buf)+4 /* Initialization */
494 LCL(output) = LCL(output_init)+4 /* Write out profiling info */
495 LCL(output_ptr) = LCL(output)+4 /* Output specific data ptr */
496
497 /* Memory allocation support */
498 LCL(acontext) = LCL(output_ptr)+4 /* pointers to allocation context blocks */
499
500 LCL(bogus_func) = LCL(acontext)+4*C_max /* function to use if gprof arc is bad */
501 LCL(vars_unused) = LCL(bogus_func)+4 /* future growth */
502
503 /* flags */
504 LCL(init) = LCL(vars_unused)+4*63 /* whether initializations were done */
505 LCL(active) = LCL(init)+1 /* whether profiling is active */
506 LCL(do_profile) = LCL(active)+1 /* whether to do profiling */
507 LCL(use_dci) = LCL(do_profile)+1 /* whether to use DCI */
508 LCL(use_profil) = LCL(use_dci)+1 /* whether to use profil */
509 LCL(recursive_alloc) = LCL(use_profil)+1 /* alloc called recursively */
510 LCL(output_uarea) = LCL(recursive_alloc)+1 /* output uarea */
511 LCL(output_stats) = LCL(output_uarea)+1 /* output stats info */
512 LCL(output_clock) = LCL(output_stats)+1 /* output the clock ticks */
513 LCL(multiple_sections) = LCL(output_clock)+1 /* multiple sections are ok */
514 LCL(have_bb) = LCL(multiple_sections)+1 /* whether we have basic block data */
515 LCL(init_format) = LCL(have_bb)+1 /* The output format has been chosen */
516 LCL(debug) = LCL(init_format)+1 /* Whether or not we are debugging */
517 LCL(check_funcs) = LCL(debug)+1 /* Whether to check functions for validity */
518 LCL(flag_unused) = LCL(check_funcs)+1 /* unused flags */
519 LCL(end_of_vars) = LCL(flag_unused)+62 /* size of machine independent vars */
520
521 /*
522 * Data that contains profile statistics that can be dumped out
523 * into the {,g}mon.out file. This is defined in profile-md.h.
524 */
525
526 LCL(stats_start) = LCL(end_of_vars) /* start of stats substructure */
527 LCL(stats_major_version)= LCL(stats_start) /* major version number */
528 LCL(stats_minor_version)= LCL(stats_major_version)+4 /* minor version number */
529 LCL(stats_size) = LCL(stats_minor_version)+4 /* size of _profile_stats structure */
530 LCL(profil_buckets) = LCL(stats_size)+4 /* # profil buckets */
531 LCL(my_cpu) = LCL(profil_buckets)+4 /* identify which cpu/thread this is */
532 LCL(max_cpu) = LCL(my_cpu)+4 /* identify which cpu/thread this is */
533 LCL(prof_records) = LCL(max_cpu)+4 /* # of profiled functions */
534 LCL(gprof_records) = LCL(prof_records)+4 /* # of gprof arcs created */
535 LCL(hash_buckets) = LCL(gprof_records)+4 /* max gprof hash buckets on a chain */
536 LCL(bogus_count) = LCL(hash_buckets)+4 /* # bogus functions found in gprof */
537
538 LCL(cnt) = LCL(bogus_count)+4 /* # of _{prof,gprof}_mcount calls */
539 LCL(dummy) = LCL(cnt)+8 /* # of _dummy_mcount calls */
540 LCL(old_mcount) = LCL(dummy)+8 /* # of old mcount calls */
541 LCL(hash_search) = LCL(old_mcount)+8 /* # gprof hash buckets searched */
542 LCL(hash_num) = LCL(hash_search)+8 /* # times hash table searched */
543 LCL(user_ticks) = LCL(hash_num)+8 /* # ticks within user space */
544 LCL(kernel_ticks) = LCL(user_ticks)+8 /* # ticks within kernel space */
545 LCL(idle_ticks) = LCL(kernel_ticks)+8 /* # ticks cpu was idle */
546 LCL(overflow_ticks) = LCL(idle_ticks)+8 /* # ticks where histcounter overflowed */
547 LCL(acontext_locked) = LCL(overflow_ticks)+8 /* # times an acontext was locked */
548 LCL(too_low) = LCL(acontext_locked)+8 /* # times histogram tick too low */
549 LCL(too_high) = LCL(too_low)+8 /* # times histogram tick too low */
550 LCL(prof_overflow) = LCL(too_high)+8 /* # times the prof count field overflowed */
551 LCL(gprof_overflow) = LCL(prof_overflow)+8 /* # times the gprof count field overflowed */
552 LCL(num_alloc) = LCL(gprof_overflow)+8 /* # allocations in each context */
553 LCL(bytes_alloc) = LCL(num_alloc)+4*C_max /* bytes allocated in each context */
554 LCL(num_context) = LCL(bytes_alloc)+4*C_max /* # allocation context blocks */
555 LCL(wasted) = LCL(num_context)+4*C_max /* # bytes wasted */
556 LCL(overhead) = LCL(wasted)+4*C_max /* # bytes of overhead */
557 LCL(buckets) = LCL(overhead)+4*C_max /* # hash indexes that have n buckets */
558 LCL(cache_hits1) = LCL(buckets)+4*10 /* # gprof cache hits in bucket #1 */
559 LCL(cache_hits2) = LCL(cache_hits1)+8 /* # gprof cache hits in bucket #2 */
560 LCL(cache_hits3) = LCL(cache_hits2)+8 /* # gprof cache hits in bucket #3 */
561 LCL(stats_unused) = LCL(cache_hits3)+8 /* reserved for future use */
562 LCL(stats_end) = LCL(stats_unused)+8*64 /* end of stats structure */
563
564 /*
565 * Machine dependent variables that no C file should access (except for
566 * profile-md.c).
567 */
568
569 LCL(md_start) = LCL(stats_end) /* start of md structure */
570 LCL(md_major_version) = LCL(md_start) /* major version number */
571 LCL(md_minor_version) = LCL(md_major_version)+4 /* minor version number */
572 LCL(md_size) = LCL(md_minor_version)+4 /* size of _profile_stats structure */
573 LCL(hash_ptr) = LCL(md_size)+4 /* gprof hash pointer */
574 LCL(hash_size) = LCL(hash_ptr)+4 /* gprof hash size */
575 LCL(num_cache) = LCL(hash_size)+4 /* # of cache entries */
576 LCL(save_mcount_ptr) = LCL(num_cache)+4 /* save for mcount_ptr when suspending profiling */
577 LCL(mcount_ptr_ptr) = LCL(save_mcount_ptr)+4 /* pointer to _mcount_ptr */
578 LCL(dummy_ptr) = LCL(mcount_ptr_ptr)+4 /* pointer to gprof_dummy */
579 LCL(alloc_pages) = LCL(dummy_ptr)+4 /* allocate more memory */
580 LCL(num_buffer) = LCL(alloc_pages)+4 /* buffer to convert 64 bit ints in */
581 LCL(md_unused) = LCL(num_buffer)+N_digit /* unused fields */
582 LCL(md_end) = LCL(md_unused)+4*58 /* end of md structure */
583 LCL(total_size) = LCL(md_end) /* size of entire structure */
584
585 /*
586 * Size of the entire _profile_vars structure.
587 */
588
589 DATA(_profile_size)
590 .long LCL(total_size)
591 ENDDATA(_profile_size)
592
593 /*
594 * Size of the statistics substructure.
595 */
596
597 DATA(_profile_stats_size)
598 .long LCL(stats_end)-LCL(stats_start)
599 ENDDATA(_profile_stats_size)
600
601 /*
602 * Size of the profil info substructure.
603 */
604
605 DATA(_profile_profil_size)
606 .long LCL(profil_end)-LCL(profil_start)
607 ENDDATA(_profile_profil_size)
608
609 /*
610 * Size of the machine dependent substructure.
611 */
612
613 DATA(_profile_md_size)
614 .long LCL(md_end)-LCL(md_start)
615 ENDDATA(_profile_profil_size)
616
617 /*
618 * Whether statistics are supported.
619 */
620
621 DATA(_profile_do_stats)
622 .long DO_STATS
623 ENDDATA(_profile_do_stats)
624
625 .text
626
627 /*
628 * Map LCL(xxx) -> into simpler names
629 */
630
631 #define V_acontext LCL(acontext)
632 #define V_acontext_locked LCL(acontext_locked)
633 #define V_alloc_pages LCL(alloc_pages)
634 #define V_bogus_func LCL(bogus_func)
635 #define V_bytes_alloc LCL(bytes_alloc)
636 #define V_cache_hits1 LCL(cache_hits1)
637 #define V_cache_hits2 LCL(cache_hits2)
638 #define V_cache_hits3 LCL(cache_hits3)
639 #define V_cnt LCL(cnt)
640 #define V_cnt_overflow LCL(cnt_overflow)
641 #define V_check_funcs LCL(check_funcs)
642 #define V_dummy LCL(dummy)
643 #define V_dummy_overflow LCL(dummy_overflow)
644 #define V_dummy_ptr LCL(dummy_ptr)
645 #define V_gprof_records LCL(gprof_records)
646 #define V_hash_num LCL(hash_num)
647 #define V_hash_ptr LCL(hash_ptr)
648 #define V_hash_search LCL(hash_search)
649 #define V_mcount_ptr_ptr LCL(mcount_ptr_ptr)
650 #define V_num_alloc LCL(num_alloc)
651 #define V_num_buffer LCL(num_buffer)
652 #define V_num_context LCL(num_context)
653 #define V_old_mcount LCL(old_mcount)
654 #define V_old_mcount_overflow LCL(old_mcount_overflow)
655 #define V_overhead LCL(overhead)
656 #define V_page_size LCL(page_size)
657 #define V_prof_records LCL(prof_records)
658 #define V_recursive_alloc LCL(recursive_alloc)
659 #define V_wasted LCL(wasted)
660
661 /*
662 * Loadup %ebx with the address of _profile_vars. On a multiprocessor, this
663 * will loads up the appropriate machine's _profile_vars structure.
664 * For ELF shared libraries, rely on the fact that we won't need a GOT,
665 * except to load this pointer.
666 */
667
668 #if defined (MACH_KERNEL)
669 #define ASSEMBLER
670 #include <i386/mp.h>
671
672 #if SQT
673 #include <i386/SQT/asm_macros.h>
674 #endif
675
676 #ifndef CPU_NUMBER
677 #error "Cannot determine how to get CPU number"
678 #endif
679
680 #define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx
681
682 #else /* not kernel */
683 #define Vload Gload; Egaddr(%ebx,_profile_vars)
684 #endif
685
686 \f
687 /*
688 * Allocate some memory for profiling. This memory is guaranteed to
689 * be zero.
690 * %eax contains the memory size requested and will contain ptr on exit.
691 * %ebx contains the address of the appropriate profile_vars structure.
692 * %ecx is the number of the memory pool to allocate from (trashed on exit).
693 * %edx is trashed.
694 * %esi is preserved.
695 * %edi is preserved.
696 * %ebp is preserved.
697 */
698
699 Entry(_profile_alloc_asm)
700 ENTER
701 pushl %esi
702 pushl %edi
703
704 movl %ecx,%edi /* move context number to saved reg */
705
706 #if NO_RECURSIVE_ALLOC
707 movb $-1,%cl
708 xchgb %cl,V_recursive_alloc(%ebx)
709 cmpb $0,%cl
710 je LCL(no_recurse)
711
712 int $3
713
714 .align ALIGN
715 LCL(no_recurse):
716 #endif
717
718 leal V_acontext(%ebx,%edi,4),%ecx
719
720 /* Loop looking for a free allocation context. */
721 /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */
722 /* %edi = context number */
723
724 .align ALIGN
725 LCL(alloc_loop):
726 movl %ecx,%esi /* save ptr in case no more contexts */
727 movl A_next(%ecx),%ecx /* next context block */
728 cmpl $0,%ecx
729 je LCL(alloc_context) /* need to allocate a new context block */
730
731 movl $-1,%edx
732 xchgl %edx,A_lock(%ecx) /* %edx == 0 if context available */
733
734 #if DO_STATS
735 SDADDNEG(%edx,V_acontext_locked(%ebx)) /* increment counter if lock was held */
736 #endif
737
738 cmpl $0,%edx
739 jne LCL(alloc_loop) /* go back if this context block is not available */
740
741 /* Allocation context found (%ecx), now allocate. */
742 movl A_plist(%ecx),%edx /* pointer to current block */
743 cmpl $0,%edx /* first allocation? */
744 je LCL(alloc_new)
745
746 cmpl %eax,M_nfree(%edx) /* see if we have enough space */
747 jl LCL(alloc_new) /* jump if not enough space */
748
749 /* Allocate from local block (and common exit) */
750 /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */
751 /* %edi = context number */
752
753 .align ALIGN
754 LCL(alloc_ret):
755
756 #if DO_STATS
757 SLOCK incl V_num_alloc(%ebx,%edi,4) /* update global counters */
758 SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4)
759 SLOCK subl %eax,V_wasted(%ebx,%edi,4)
760 #endif
761
762 movl M_ptr(%edx),%esi /* pointer return value */
763 subl %eax,M_nfree(%edx) /* decrement bytes remaining */
764 addl %eax,M_nalloc(%edx) /* increment bytes allocated */
765 incl M_num(%edx) /* increment # allocations */
766 addl %eax,M_ptr(%edx) /* advance pointer */
767 movl $0,A_lock(%ecx) /* unlock context block */
768 movl %esi,%eax /* return pointer */
769
770 #if NO_RECURSIVE_ALLOC
771 movb $0,V_recursive_alloc(%ebx)
772 #endif
773
774 popl %edi
775 popl %esi
776 LEAVE0
777 ret /* return to the caller */
778
779 /* Allocate space in whole number of pages */
780 /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */
781 /* %edi = context number */
782
783 .align ALIGN
784 LCL(alloc_new):
785 pushl %eax /* save regs */
786 pushl %ecx
787 movl V_page_size(%ebx),%edx
788 addl $(M_size-1),%eax /* add in overhead size & subtract 1 */
789 decl %edx /* page_size - 1 */
790 addl %edx,%eax /* round up to whole number of pages */
791 notl %edx
792 andl %edx,%eax
793 leal -M_size(%eax),%esi /* save allocation size */
794 pushl %eax /* argument to _profile_alloc_pages */
795 call *V_alloc_pages(%ebx) /* allocate some memory */
796 addl $4,%esp /* pop off argument */
797
798 #if DO_STATS
799 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* udpate global counters */
800 SLOCK addl $(M_size),V_overhead(%ebx,%edi,4)
801 #endif
802
803 popl %ecx /* context block */
804 movl %eax,%edx /* memory block pointer */
805 movl %esi,M_nfree(%edx) /* # free bytes */
806 addl $(M_size),%eax /* bump past overhead */
807 movl A_plist(%ecx),%esi /* previous memory block or 0 */
808 movl %eax,M_first(%edx) /* first space available */
809 movl %eax,M_ptr(%edx) /* current address available */
810 movl %esi,M_next(%edx) /* next memory block allocated */
811 movl %edx,A_plist(%ecx) /* update current page list */
812 popl %eax /* user size request */
813 jmp LCL(alloc_ret) /* goto common return code */
814
815 /* Allocate a context header in addition to memory block header + data */
816 /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */
817 /* %edi = context number */
818
819 .align ALIGN
820 LCL(alloc_context):
821 pushl %eax /* save regs */
822 pushl %esi
823 movl V_page_size(%ebx),%edx
824 addl $(A_size+M_size-1),%eax /* add in overhead size & subtract 1 */
825 decl %edx /* page_size - 1 */
826 addl %edx,%eax /* round up to whole number of pages */
827 notl %edx
828 andl %edx,%eax
829 leal -A_size-M_size(%eax),%esi /* save allocation size */
830 pushl %eax /* argument to _profile_alloc_pages */
831 call *V_alloc_pages(%ebx) /* allocate some memory */
832 addl $4,%esp /* pop off argument */
833
834 #if DO_STATS
835 SLOCK incl V_num_context(%ebx,%edi,4) /* bump # context blocks */
836 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* update global counters */
837 SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4)
838 #endif
839
840 movl %eax,%ecx /* context pointer */
841 leal A_size(%eax),%edx /* memory block pointer */
842 movl %esi,M_nfree(%edx) /* # free bytes */
843 addl $(A_size+M_size),%eax /* bump past overhead */
844 movl %eax,M_first(%edx) /* first space available */
845 movl %eax,M_ptr(%edx) /* current address available */
846 movl $0,M_next(%edx) /* next memory block allocated */
847 movl %edx,A_plist(%ecx) /* head of memory block list */
848 movl $1,A_lock(%ecx) /* set lock */
849 popl %esi /* ptr to store context block link */
850 movl %ecx,%eax /* context pointer temp */
851 xchgl %eax,A_next(%esi) /* link into chain */
852 movl %eax,A_next(%ecx) /* add links in case of threading */
853 popl %eax /* user size request */
854 jmp LCL(alloc_ret) /* goto common return code */
855
856 END(_profile_alloc_asm)
857
858 /*
859 * C callable version of the profile memory allocator.
860 * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t);
861 */
862
863 Entry(_profile_alloc)
864 ENTER
865 pushl %ebx
866 movl 12+Estack(%esp),%eax /* memory size */
867 movl 8+Estack(%esp),%ebx /* provile_vars address */
868 addl $3,%eax /* round up to word boundary */
869 movl 16+Estack(%esp),%ecx /* which memory pool to allocate from */
870 andl $0xfffffffc,%eax
871 call EXT(_profile_alloc_asm)
872 popl %ebx
873 LEAVE0
874 ret
875 END(_profile_alloc)
876
877 \f
878 /*
879 * Dummy mcount routine that just returns.
880 *
881 * +-------------------------------+
882 * | |
883 * | |
884 * | caller's caller stack, |
885 * | saved registers, params. |
886 * | |
887 * | |
888 * +-------------------------------+
889 * | caller's caller return addr. |
890 * +-------------------------------+
891 * esp --> | caller's return address |
892 * +-------------------------------+
893 *
894 * edx --> function unqiue LCL
895 */
896
897 Entry(_dummy_mcount)
898 ENTER
899
900 #if DO_STATS
901 pushl %ebx
902 MP_DISABLE_PREEMPTION(%ebx)
903 Vload
904 SDINC(V_dummy(%ebx))
905 MP_ENABLE_PREEMPTION(%ebx)
906 popl %ebx
907 #endif
908
909 LEAVE0
910 ret
911 END(_dummy_mcount)
912
913 \f
914 /*
915 * Entry point for System V based profiling, count how many times each function
916 * is called. The function label is passed in %edx, and the top two words on
917 * the stack are the caller's address, and the caller's return address.
918 *
919 * +-------------------------------+
920 * | |
921 * | |
922 * | caller's caller stack, |
923 * | saved registers, params. |
924 * | |
925 * | |
926 * +-------------------------------+
927 * | caller's caller return addr. |
928 * +-------------------------------+
929 * esp --> | caller's return address |
930 * +-------------------------------+
931 *
932 * edx --> function unique label
933 *
934 * We don't worry about the possibility about two threads calling
935 * the same function for the first time simulataneously. If that
936 * happens, two records will be created, and one of the records
937 * address will be stored in in the function unique label (which
938 * is aligned by the compiler, so we don't have to watch out for
939 * crossing page/cache boundaries).
940 */
941
942 Entry(_prof_mcount)
943 ENTER
944
945 #if DO_STATS
946 pushl %ebx
947 MP_DISABLE_PREEMPTION(%ebx)
948 Vload
949 SDINC(V_cnt(%ebx))
950 #endif
951
952 movl (%edx),%eax /* initialized? */
953 cmpl $0,%eax
954 je LCL(pnew)
955
956 DINC2(P_count(%eax),P_overflow(%eax)) /* bump function count (double precision) */
957
958 #if DO_STATS
959 MP_ENABLE_PREEMPTION(%ebx)
960 popl %ebx
961 #endif
962
963 LEAVE0
964 ret
965
966 .align ALIGN
967 LCL(pnew):
968
969 #if !DO_STATS
970 pushl %ebx
971 MP_DISABLE_PREEMPTION(%ebx)
972 Vload
973 #endif
974
975 SLOCK incl V_prof_records(%ebx)
976 pushl %edx
977 movl $(P_size),%eax /* allocation size */
978 movl $(C_prof),%ecx /* allocation pool */
979 call EXT(_profile_alloc_asm) /* allocate a new record */
980 popl %edx
981
982 movl Estack+4(%esp),%ecx /* caller's address */
983 movl %ecx,P_addr(%eax)
984 movl $1,P_count(%eax) /* call count */
985 xchgl %eax,(%edx) /* update function header */
986 MP_ENABLE_PREEMPTION(%ebx)
987 popl %ebx
988 LEAVE0
989 ret
990
991 END(_prof_mcount)
992
993 \f
994 /*
995 * Entry point for BSD based graph profiling, count how many times each unique
996 * call graph (caller + callee) is called. The function label is passed in
997 * %edx, and the top two words on the stack are the caller's address, and the
998 * caller's return address.
999 *
1000 * +-------------------------------+
1001 * | |
1002 * | |
1003 * | caller's caller stack, |
1004 * | saved registers, params. |
1005 * | |
1006 * | |
1007 * +-------------------------------+
1008 * | caller's caller return addr. |
1009 * +-------------------------------+
1010 * esp --> | caller's return address |
1011 * +-------------------------------+
1012 *
1013 * edx --> function unqiue label
1014 *
1015 * We don't worry about the possibility about two threads calling the same
1016 * function simulataneously. If that happens, two records will be created, and
1017 * one of the records address will be stored in in the function unique label
1018 * (which is aligned by the compiler).
1019 *
1020 * By design, the gprof header is not locked. Each of the cache pointers is
1021 * always a valid pointer (possibily to a null record), and if another thread
1022 * comes in and modifies the pointer, it does so automatically with a simple store.
1023 * Since all arcs are in the hash table, the caches are just to avoid doing
1024 * a multiplication in the common case, and if they don't match, the arcs will
1025 * still be found.
1026 */
1027
1028 Entry(_gprof_mcount)
1029
1030 ENTER
1031 movl Estack+4(%esp),%ecx /* caller's caller address */
1032
1033 #if DO_STATS
1034 pushl %ebx
1035 MP_DISABLE_PREEMPTION(%ebx)
1036 Vload
1037 SDINC(V_cnt(%ebx)) /* bump profile call counter (double int) */
1038 #endif
1039
1040 movl (%edx),%eax /* Gprof header allocated? */
1041 cmpl $0,%eax
1042 je LCL(gnew) /* skip if first call */
1043
1044 DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax)) /* bump function count */
1045
1046 /* See if this call arc is the same as the last time */
1047 MARK(_gprof_mcount_cache1)
1048 movl H_cache_ptr(%eax),%edx /* last arc searched */
1049 cmpl %ecx,G_frompc(%edx) /* skip if not equal */
1050 jne LCL(gcache2)
1051
1052 /* Same as last time, increment and return */
1053
1054 DINC2(G_count(%edx),G_overflow(%edx)) /* bump arc count */
1055
1056 #if DO_STATS
1057 SDINC(V_cache_hits1(%ebx)) /* update counter */
1058 MP_ENABLE_PREEMPTION(%ebx)
1059 popl %ebx
1060 #endif
1061
1062 LEAVE0
1063 ret
1064
1065 /* Search second cache entry */
1066 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1067 /* %edx = first arc searched */
1068 /* %ebx if DO_STATS pushed on stack */
1069
1070 .align ALIGN
1071 MARK(_gprof_mcount_cache2)
1072 LCL(gcache2):
1073 pushl %esi /* get a saved register */
1074 movl H_cache_ptr+4(%eax),%esi /* 2nd arc to be searched */
1075 cmpl %ecx,G_frompc(%esi) /* skip if not equal */
1076 jne LCL(gcache3)
1077
1078 /* Element found, increment, reset last arc searched and return */
1079
1080 DINC2(G_count(%esi),G_overflow(%esi)) /* bump arc count */
1081
1082 movl %esi,H_cache_ptr+0(%eax) /* swap 1st and 2nd cached arcs */
1083 popl %esi
1084 movl %edx,H_cache_ptr+4(%eax)
1085
1086 #if DO_STATS
1087 SDINC(V_cache_hits2(%ebx)) /* update counter */
1088 MP_ENABLE_PREEMPTION(%ebx)
1089 popl %ebx
1090 #endif
1091
1092 LEAVE0
1093 ret
1094
1095 /* Search third cache entry */
1096 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1097 /* %edx = first arc searched, %esi = second arc searched */
1098 /* %esi, %ebx if DO_STATS pushed on stack */
1099
1100 .align ALIGN
1101 MARK(_gprof_mcount_cache3)
1102 LCL(gcache3):
1103 pushl %edi
1104 movl H_cache_ptr+8(%eax),%edi /* 3rd arc to be searched */
1105 cmpl %ecx,G_frompc(%edi) /* skip if not equal */
1106 jne LCL(gnocache)
1107
1108 /* Element found, increment, reset last arc searched and return */
1109
1110 DINC2(G_count(%edi),G_overflow(%edi)) /* bump arc count */
1111
1112 movl %edi,H_cache_ptr+0(%eax) /* make this 1st cached arc */
1113 movl %esi,H_cache_ptr+8(%eax)
1114 movl %edx,H_cache_ptr+4(%eax)
1115 popl %edi
1116 popl %esi
1117
1118 #if DO_STATS
1119 SDINC(V_cache_hits3(%ebx)) /* update counter */
1120 MP_ENABLE_PREEMPTION(%ebx)
1121 popl %ebx
1122 #endif
1123
1124 LEAVE0
1125 ret
1126
1127 /* No function context, allocate a new context */
1128 /* %ebx is the variables address if DO_STATS */
1129 /* %ecx is the caller's caller's address */
1130 /* %edx is the unique function pointer */
1131 /* %ebx if DO_STATS pushed on stack */
1132
1133 .align ALIGN
1134 MARK(_gprof_mcount_new)
1135 LCL(gnew):
1136 pushl %esi
1137 pushl %edi
1138
1139 #if !DO_STATS
1140 pushl %ebx /* Address of vars needed for alloc */
1141 MP_DISABLE_PREEMPTION(%ebx)
1142 Vload /* stats already loaded address */
1143 #endif
1144
1145 SLOCK incl V_prof_records(%ebx)
1146 movl %edx,%esi /* save unique function ptr */
1147 movl %ecx,%edi /* and caller's caller address */
1148 movl $(H_size),%eax /* memory block size */
1149 movl $(C_gfunc),%ecx /* gprof function header memory pool */
1150 call EXT(_profile_alloc_asm)
1151
1152 movl V_hash_ptr(%ebx),%ecx /* copy hash_ptr to func header */
1153 movl V_dummy_ptr(%ebx),%edx /* dummy cache entry */
1154 movl %ecx,H_hash_ptr(%eax)
1155 movl %edx,H_cache_ptr+0(%eax) /* store dummy cache ptrs */
1156 movl %edx,H_cache_ptr+4(%eax)
1157 movl %edx,H_cache_ptr+8(%eax)
1158 movl %esi,H_unique_ptr(%eax) /* remember function unique ptr */
1159 movl Estack+12(%esp),%ecx /* caller's address */
1160 movl $1,H_prof+P_count(%eax) /* function called once so far */
1161 movl %ecx,H_prof+P_addr(%eax) /* set up prof information */
1162 movl %eax,(%esi) /* update context block address */
1163 movl %edi,%ecx /* caller's caller address */
1164 movl %edx,%esi /* 2nd cached arc */
1165
1166 #if !DO_STATS
1167 popl %ebx
1168 #endif
1169
1170 /* Fall through to add element to the hash table. This may involve */
1171 /* searching a few hash table elements that don't need to be searched */
1172 /* since we have a new element, but it allows the hash table function */
1173 /* to be specified in only one place */
1174
1175 /* Didn't find entry in cache, search the global hash table */
1176 /* %eax = gprof func header, %ebx = vars address if DO_STATS */
1177 /* %ecx = caller's caller */
1178 /* %edx, %esi = cached arcs that were searched */
1179 /* %edi, %esi, %ebx if DO_STATS pushed on stack */
1180
1181 .align ALIGN
1182 MARK(_gprof_mcount_hash)
1183 LCL(gnocache):
1184
1185 pushl %esi /* save 2nd arc searched */
1186 pushl %edx /* save 1st arc searched */
1187 movl %eax,%esi /* save gprof func header */
1188
1189 #if DO_STATS
1190 SDINC(V_hash_num(%ebx))
1191 movl Estack+20(%esp),%edi /* caller's address */
1192 #else
1193 movl Estack+16(%esp),%edi /* caller's address */
1194 #endif
1195 movl %ecx,%eax /* caller's caller address */
1196 imull %edi,%eax /* multiply to get hash */
1197 movl H_hash_ptr(%esi),%edx /* hash pointer */
1198 shrl $(GPROF_HASH_SHIFT),%eax /* eliminate low order bits */
1199 andl $(GPROF_HASH_MASK),%eax /* mask to get hash value */
1200 leal 0(%edx,%eax,4),%eax /* pointer to hash bucket */
1201 movl %eax,%edx /* save hash bucket address */
1202
1203 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1204 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1205 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1206
1207 .align ALIGN
1208 LCL(ghash):
1209 movl G_next(%eax),%eax /* get next hash element */
1210 cmpl $0,%eax /* end of line? */
1211 je LCL(ghashnew) /* skip if allocate new hash */
1212
1213 #if DO_STATS
1214 SDINC(V_hash_search(%ebx))
1215 #endif
1216
1217 cmpl G_selfpc(%eax),%edi /* loop back if not one we want */
1218 jne LCL(ghash)
1219
1220 cmpl G_frompc(%eax),%ecx /* loop back if not one we want */
1221 jne LCL(ghash)
1222
1223 /* Found an entry, increment count, set up for caching, and return */
1224 /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */
1225 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1226
1227 DINC2(G_count(%eax),G_overflow(%eax)) /* bump arc count */
1228
1229 popl %ecx /* previous 1st arc searched */
1230 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1231 popl %edi /* previous 2nd arc searched */
1232 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1233 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1234 popl %edi
1235 popl %esi
1236
1237 #if DO_STATS
1238 MP_ENABLE_PREEMPTION(%ebx)
1239 popl %ebx
1240 #endif
1241
1242 LEAVE0
1243 ret /* return to user */
1244
1245 /* Allocate new arc */
1246 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1247 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1248 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1249
1250 .align ALIGN
1251 MARK(_gprof_mcount_hashnew)
1252 LCL(ghashnew):
1253
1254 #if !DO_STATS
1255 pushl %ebx /* load address of vars if we haven't */
1256 MP_DISABLE_PREEMPTION(%ebx)
1257 Vload /* already done so */
1258 #endif
1259
1260 SLOCK incl V_gprof_records(%ebx)
1261 pushl %edx
1262 movl %ecx,%edi /* save caller's caller */
1263 movl $(G_size),%eax /* arc size */
1264 movl $(C_gprof),%ecx /* gprof memory pool */
1265 call EXT(_profile_alloc_asm)
1266 popl %edx
1267
1268 movl $1,G_count(%eax) /* set call count */
1269 movl Estack+20(%esp),%ecx /* caller's address */
1270 movl %edi,G_frompc(%eax) /* caller's caller */
1271 movl %ecx,G_selfpc(%eax)
1272
1273 #if !DO_STATS
1274 popl %ebx /* release %ebx if no stats */
1275 #endif
1276
1277 movl (%edx),%ecx /* first hash bucket */
1278 movl %ecx,G_next(%eax) /* update link */
1279 movl %eax,%ecx /* copy for xchgl */
1280 xchgl %ecx,(%edx) /* add to hash linked list */
1281 movl %ecx,G_next(%eax) /* update in case list changed */
1282
1283 popl %ecx /* previous 1st arc searched */
1284 popl %edi /* previous 2nd arc searched */
1285 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1286 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1287 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1288
1289 popl %edi
1290 popl %esi
1291
1292 #if DO_STATS
1293 MP_ENABLE_PREEMPTION(%ebx)
1294 popl %ebx
1295 #endif
1296
1297 LEAVE0
1298 ret /* return to user */
1299
1300 END(_gprof_mcount)
1301
1302 \f
1303 /*
1304 * This function assumes that neither the caller or it's caller
1305 * has not omitted the frame pointer in order to get the caller's
1306 * caller. The stack looks like the following at the time of the call:
1307 *
1308 * +-------------------------------+
1309 * | |
1310 * | |
1311 * | caller's caller stack, |
1312 * | saved registers, params. |
1313 * | |
1314 * | |
1315 * +-------------------------------+
1316 * | caller's caller return addr. |
1317 * +-------------------------------+
1318 * fp --> | previous frame pointer |
1319 * +-------------------------------+
1320 * | |
1321 * | caller's stack, saved regs, |
1322 * | params. |
1323 * | |
1324 * +-------------------------------+
1325 * sp --> | caller's return address |
1326 * +-------------------------------+
1327 *
1328 * Recent versions of the compiler put the address of the pointer
1329 * sized word in %edx. Previous versions did not, but this code
1330 * does not support them.
1331 */
1332
1333 /*
1334 * Note that OSF/rose blew defining _mcount, since it prepends leading
1335 * underscores, and _mcount didn't have a second leading underscore. However,
1336 * some of the kernel/server functions 'know' that mcount has a leading
1337 * underscore, so we satisfy both camps.
1338 */
1339
1340 #if OLD_MCOUNT
1341 .globl mcount
1342 .globl _mcount
1343 ELF_FUNC(mcount)
1344 ELF_FUNC(_mcount)
1345 .align FALIGN
1346 _mcount:
1347 mcount:
1348
1349 pushl %ebx
1350 MP_DISABLE_PREEMPTION(%ebx)
1351 Vload
1352
1353 #if DO_STATS
1354 SDINC(V_old_mcount(%ebx))
1355 #endif
1356
1357 /* In calling the functions, we will actually leave 1 extra word on the */
1358 /* top of the stack, but generated code will not notice, since the function */
1359 /* uses a frame pointer */
1360
1361 movl V_mcount_ptr_ptr(%ebx),%ecx /* address of mcount_ptr */
1362 MP_ENABLE_PREEMPTION(%ebx)
1363 popl %ebx
1364 movl 4(%ebp),%eax /* caller's caller return address */
1365 xchgl %eax,(%esp) /* push & get return address */
1366 pushl %eax /* push return address */
1367 jmp *(%ecx) /* go to profile the function */
1368
1369 End(mcount)
1370 End(_mcount)
1371 #endif
1372
1373 \f
1374 #if !defined(KERNEL) && !defined(MACH_KERNEL)
1375
1376 /*
1377 * Convert a 64-bit integer to a string.
1378 * Arg #1 is a pointer to a string (at least 24 bytes) or NULL
1379 * Arg #2 is the low part of the 64-bit integer.
1380 * Arg #3 is the high part of the 64-bit integer.
1381 */
1382
1383 Entry(_profile_cnt_to_decimal)
1384 ENTER
1385 pushl %ebx
1386 pushl %esi
1387 pushl %edi
1388 movl Estack+16(%esp),%ebx /* pointer or null */
1389 movl Estack+20(%esp),%edi /* low part of number */
1390 movl $10,%ecx /* divisor */
1391 cmpl $0,%ebx /* skip if pointer ok */
1392 jne LCL(cvt_nonnull)
1393
1394 MP_DISABLE_PREEMPTION(%ebx)
1395 Vload /* get _profile_vars address */
1396 leal V_num_buffer(%ebx),%ebx /* temp buffer to use */
1397
1398 .align ALIGN
1399 LCL(cvt_nonnull):
1400 addl $(N_digit-1),%ebx /* point string at end */
1401 movb $0,0(%ebx) /* null terminate string */
1402
1403 #if OVERFLOW
1404 movl Estack+24(%esp),%esi /* high part of number */
1405 cmpl $0,%esi /* any thing left in high part? */
1406 je LCL(cvt_low)
1407
1408 .align ALIGN
1409 LCL(cvt_high):
1410 movl %esi,%eax /* calculate high/10 & high%10 */
1411 xorl %edx,%edx
1412 divl %ecx
1413 movl %eax,%esi
1414
1415 movl %edi,%eax /* calculate (low + (high%10)*2^32) / 10 */
1416 divl %ecx
1417 movl %eax,%edi
1418
1419 decl %ebx /* decrement string pointer */
1420 addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1421 movb %dl,0(%ebx) /* store digit in string */
1422 cmpl $0,%esi /* any thing left in high part? */
1423 jne LCL(cvt_high)
1424
1425 #endif /* OVERFLOW */
1426
1427 .align ALIGN
1428 LCL(cvt_low):
1429 movl %edi,%eax /* get low part into %eax */
1430
1431 .align ALIGN
1432 LCL(cvt_low2):
1433 xorl %edx,%edx /* 0 */
1434 divl %ecx /* calculate next digit */
1435 decl %ebx /* decrement string pointer */
1436 addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1437 movb %dl,0(%ebx) /* store digit in string */
1438 cmpl $0,%eax /* any more digits to convert? */
1439 jne LCL(cvt_low2)
1440
1441 movl %ebx,%eax /* return value */
1442 popl %edi
1443 popl %esi
1444 MP_ENABLE_PREEMPTION(%ebx)
1445 popl %ebx
1446 LEAVE0
1447 ret
1448
1449 END(_profile_cnt_to_decimal)
1450
1451 #endif