]> git.saurik.com Git - apple/xnu.git/blame - osfmk/profiling/i386/profile-asm.s
xnu-792.13.8.tar.gz
[apple/xnu.git] / osfmk / profiling / i386 / profile-asm.s
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
1c79356b 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
1c79356b
A
29 */
30/*
31 * @OSF_COPYRIGHT@
32 */
33/*
34 * HISTORY
35 *
36 * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez
37 * Import of Mac OS X kernel (~semeria)
38 *
39 * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez
40 * Import of OSF Mach kernel (~mburg)
41 *
42 * Revision 1.1.7.1 1997/09/22 17:41:24 barbou
43 * MP+RT: protect cpu_number() usage against preemption.
44 * [97/09/16 barbou]
45 *
46 * Revision 1.1.5.1 1995/01/06 19:53:37 devrcs
47 * mk6 CR668 - 1.3b26 merge
48 * new file for mk6
49 * [1994/10/12 22:25:20 dwm]
50 *
51 * Revision 1.1.2.2 1994/05/16 19:19:17 meissner
52 * Add support for converting 64-bit integers to a decimal string.
53 * Use the correct address (selfpc) when creating the prof header for gprof.
54 * [1994/04/28 21:44:59 meissner]
55 *
56 * Revision 1.1.2.1 1994/04/08 17:51:42 meissner
57 * Make most stats 64 bits, except for things like memory allocation.
58 * [1994/04/02 14:58:21 meissner]
59 *
60 * Do not provide old mcount support under MK or server.
61 * Fixup stats size so it is the same as in profile-md.h.
62 * [1994/03/29 21:00:03 meissner]
63 *
64 * Use faster sequence for overflow addition.
65 * Keep {dummy,prof,gprof,old}_mcount counts in double precision.
66 * Add kernel NCPUS > 1 support.
67 * [1994/03/17 20:13:23 meissner]
68 *
69 * Add gprof/prof overflow support
70 * [1994/03/17 14:56:44 meissner]
71 *
72 * Add size of histogram counters & unused fields to profile_profil struct
73 * [1994/02/17 21:41:44 meissner]
74 *
75 * Add too_low/too_high to profile_stats.
76 * [1994/02/16 22:38:11 meissner]
77 *
78 * Bump # allocation contexts to 32 from 16.
79 * Store unique ptr address in gprof function header structure for _profile_reset.
80 * Add new fields from profile-{internal,md}.h.
81 * Align loop looking for an unlocked acontext.
82 * Count # times a locked context block was found.
83 * Expand copyright.
84 * [1994/02/07 12:40:56 meissner]
85 *
86 * Keep track of the number of times the kernel overflows the HISTCOUNTER counter.
87 * [1994/02/03 20:13:23 meissner]
88 *
89 * Add stats for {user,kernel,idle} mode in the kernel.
90 * [1994/02/03 15:17:22 meissner]
91 *
92 * No change.
93 * [1994/02/03 00:58:49 meissner]
94 *
95 * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars.
96 * [1994/02/01 12:03:56 meissner]
97 *
98 * Move _mcount_ptr to be closer to other data declarations.
99 * Add text_len to profile_profil structure for mk.
100 * Split records_cnt into prof_cnt/gprof_cnt.
101 * Always update prof_cnt/gprof_cnt even if not DO_STATS.
102 * Add current/max cpu indicator to stats for kernel.
103 * [1994/01/28 23:33:20 meissner]
104 *
105 * Don't do 4+Lgotoff(lab), use separate labels.
106 * Change GPROF_HASH_SHIFT to 9 (from 8).
107 * [1994/01/26 22:00:59 meissner]
108 *
109 * Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads.
110 * [1994/01/26 20:30:57 meissner]
111 *
112 * Move callback pointers into separate allocation context.
113 * Add size fields for other structures to profile-vars.
114 * Allocate string table as one large allocation.
115 * Rewrite old mcount code once again.
116 * Use multiply to make hash value, not divide.
117 * Hash table is now a power of two.
118 * [1994/01/26 20:23:32 meissner]
119 *
120 * Cut hash table size back to 16189.
121 * Add size fields to all structures.
122 * Add major/minor version number to _profile_md.
123 * Move allocation context block pointers to _profile_vars.
124 * Move _gprof_dummy after _profile_md.
125 * New function header code now falls into hash an element
126 * to avoid having the hash code duplicated or use a macro.
127 * Fix bug in _gprof_mcount with ELF shared libraries.
128 * [1994/01/25 01:45:59 meissner]
129 *
130 * Move init functions to C code; rearrange profil varaibles.
131 * [1994/01/22 01:11:14 meissner]
132 *
133 * No change.
134 * [1994/01/20 20:56:43 meissner]
135 *
136 * Fixup copyright.
137 * [1994/01/18 23:07:39 meissner]
138 *
139 * Make flags byte-sized.
140 * Add have_bb flag.
141 * Add init_format flag.
142 * Always put word size multipler first in .space.
143 * [1994/01/18 21:57:14 meissner]
144 *
145 * Fix elfpic problems in last change.
146 * [1994/01/16 14:04:26 meissner]
147 *
148 * Rewrite gprof caching to be faster & not need a lock.
149 * Record prof information for gprof too.
150 * Bump reserved stats to 64.
151 * Bump up hash table size 30799.
152 * Conditionally use lock prefix.
153 * Change most #ifdef's to #if.
154 * DEBUG_PROFILE turns on stack frames now.
155 * Conditionally add externs to gprof to determine where time is spent.
156 * Prof_mcount uses xchgl to update function pointer.
157 * [1994/01/15 18:40:33 meissner]
158 *
159 * Fix a comment.
160 * Separate statistics from debugging (though debugging turns it on).
161 * Remove debug code that traces each gprof request.
162 * [1994/01/15 00:59:02 meissner]
163 *
164 * Move max hash bucket calculation into _gprof_write & put info in stats structure.
165 * [1994/01/04 16:15:14 meissner]
166 *
167 * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to.
168 * [1994/01/04 15:37:44 meissner]
169 *
170 * Add more allocation memory pools (gprof function hdrs in particular).
171 * For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time.
172 * Add major/minor version numbers to _profile_{vars,stats}.
173 * Add # profil buckets field to _profil_stats.
174 * [19
175 *
176 * $EndLog$
177 */
178
179/*
180 * Common 386 profiling module that is shared between the kernel, mach
181 * servers, and the user space library. Each environment includes
182 * this file.
183 */
184
185 .file "profile-asm.s"
186
1c79356b
A
187#include <machine/asm.h>
188
189/*
190 * By default, debugging turns on statistics and stack frames.
191 */
192
193#if DEBUG_PROFILE
194#ifndef DO_STATS
195#define DO_STATS 1
196#endif
197
198#ifndef STACK_FRAMES
199#define STACK_FRAMES 1
200#endif
201#endif
202
203#ifndef OLD_MCOUNT
204#define OLD_MCOUNT 0 /* do not compile old code for mcount */
205#endif
206
207#ifndef DO_STATS
208#define DO_STATS 1 /* compile in statistics code */
209#endif
210
211#ifndef DO_LOCK
212#define DO_LOCK 0 /* use lock; in front of increments */
213#endif
214
215#ifndef LOCK_STATS
216#define LOCK_STATS DO_LOCK /* update stats with lock set */
217#endif
218
219#ifndef STACK_FRAMES
220#define STACK_FRAMES 0 /* create stack frames for debugger */
221#endif
222
223#ifndef NO_RECURSIVE_ALLOC
224#define NO_RECURSIVE_ALLOC 0 /* check for recursive allocs */
225 /* (not thread safe!) */
226#endif
227
228#ifndef MARK_GPROF
229#define MARK_GPROF 0 /* add externs for gprof profiling */
230#endif
231
232#ifndef OVERFLOW
233#define OVERFLOW 1 /* add overflow checking support */
234#endif
235
236/*
237 * Turn on the use of the lock prefix if desired.
238 */
239
240#ifndef LOCK
241#if DO_LOCK
242#define LOCK lock;
243#else
244#define LOCK
245#endif
246#endif
247
248#ifndef SLOCK
249#if LOCK_STATS
250#define SLOCK LOCK
251#else
252#define SLOCK
253#endif
254#endif
255
256/*
257 * Double or single precision incrementing
258 */
259
260#if OVERFLOW
261#define DINC(mem) LOCK addl $1,mem; LOCK adcl $0,4+mem
262#define DINC2(mem,mem2) LOCK addl $1,mem; LOCK adcl $0,mem2
263#define SDINC(mem) SLOCK addl $1,mem; SLOCK adcl $0,4+mem
264#define SDADD(val,mem) SLOCK addl val,mem; SLOCK adcl $0,4+mem
265#define SDADDNEG(val,mem) SLOCK subl val,mem; SLOCK adcl $0,4+mem
266#define SDSUB(val,mem) SLOCK subl val,mem; SLOCK sbbl $0,4+mem
267
268#else
269#define DINC(mem) LOCK incl mem
270#define DINC2(mem,mem2) LOCK incl mem
271#define SDINC(mem) SLOCK incl mem
272#define SDADD(val,mem) SLOCK addl val,mem
273#define SDADDNEG(val,mem) SLOCK subl val,mem
274#define SDSUB(val,mem) SLOCK subl val,mem
275#endif
276
277/*
278 * Stack frame support so that debugger traceback works.
279 */
280
281#if STACK_FRAMES
282#define ENTER pushl %ebp; movl %esp,%ebp
283#define LEAVE0 popl %ebp
284#define Estack 4
285#else
286#define ENTER
287#define LEAVE0
288#define Estack 0
289#endif
290
291/*
292 * Gprof profiling.
293 */
294
295#if MARK_GPROF
296#define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name)
297#else
298#define MARK(name)
299#endif
300
301/*
302 * Profiling allocation context block. Each time memory is needed, the
303 * allocator loops until it finds an unlocked context block, and allocates
304 * from that block. If no context blocks are available, a new memory
305 * pool is allocated, and added to the end of the chain.
306 */
307
308LCL(A_next) = 0 /* next context block link (must be 0) */
309LCL(A_plist) = LCL(A_next)+4 /* head of page list for context block */
310LCL(A_lock) = LCL(A_plist)+4 /* lock word */
311LCL(A_size) = LCL(A_lock)+4 /* size of context block */
312
313#define A_next LCL(A_next)
314#define A_plist LCL(A_plist)
315#define A_lock LCL(A_lock)
316#define A_size LCL(A_size)
317
318/*
319 * Allocation contexts used.
320 */
321
322LCL(C_prof) = 0 /* prof records */
323LCL(C_gprof) = 1 /* gprof arc records */
324LCL(C_gfunc) = 2 /* gprof function headers */
325LCL(C_misc) = 3 /* misc. allocations */
326LCL(C_profil) = 4 /* memory for profil */
327LCL(C_dci) = 5 /* memory for dci */
328LCL(C_bb) = 6 /* memory for basic blocks */
329LCL(C_callback) = 7 /* memory for callbacks */
330LCL(C_max) = 32 /* # allocation contexts */
331
332#define C_prof LCL(C_prof)
333#define C_gprof LCL(C_gprof)
334#define C_gfunc LCL(C_gfunc)
335#define C_max LCL(C_max)
336
337/*
338 * Linked list of memory allocations.
339 */
340
341LCL(M_first) = 0 /* pointer to first byte available */
342LCL(M_ptr) = LCL(M_first)+4 /* pointer to next available byte */
343LCL(M_next) = LCL(M_ptr)+4 /* next page allocated */
344LCL(M_nfree) = LCL(M_next)+4 /* # bytes available */
345LCL(M_nalloc) = LCL(M_nfree)+4 /* # bytes allocated */
346LCL(M_num) = LCL(M_nalloc)+4 /* # allocations done on this page */
347LCL(M_size) = LCL(M_num)+4 /* size of page header */
348
349#define M_first LCL(M_first)
350#define M_ptr LCL(M_ptr)
351#define M_next LCL(M_next)
352#define M_nfree LCL(M_nfree)
353#define M_nalloc LCL(M_nalloc)
354#define M_num LCL(M_num)
355#define M_size LCL(M_size)
356
357/*
358 * Prof data type.
359 */
360
361LCL(P_addr) = 0 /* function address */
362LCL(P_count) = LCL(P_addr)+4 /* # times function called */
363LCL(P_overflow) = LCL(P_count)+4 /* # times count overflowed */
364LCL(P_size) = LCL(P_overflow)+4 /* size of prof data type */
365
366#define P_addr LCL(P_addr)
367#define P_count LCL(P_count)
368#define P_overflow LCL(P_overflow)
369#define P_size LCL(P_size)
370
371/*
372 * Gprof data type.
373 */
374
375LCL(G_next) = 0 /* next hash link (must be 0) */
376LCL(G_frompc) = LCL(G_next)+4 /* caller's caller */
377LCL(G_selfpc) = LCL(G_frompc)+4 /* caller's address */
378LCL(G_count) = LCL(G_selfpc)+4 /* # times arc traversed */
379LCL(G_overflow) = LCL(G_count)+4 /* # times count overflowed */
380LCL(G_size) = LCL(G_overflow)+4 /* size of gprof data type */
381
382#define G_next LCL(G_next)
383#define G_frompc LCL(G_frompc)
384#define G_selfpc LCL(G_selfpc)
385#define G_count LCL(G_count)
386#define G_overflow LCL(G_overflow)
387#define G_size LCL(G_size)
388
389/*
390 * Gprof header.
391 *
392 * At least one header is allocated for each unique function that is profiled.
393 * In order to save time calculating the hash value, the last H_maxcache
394 * distinct arcs are cached within this structure. Also, to avoid loading
395 * the GOT when searching the hash table, we copy the hash pointer to this
396 * structure, so that we only load the GOT when we need to allocate an arc.
397 */
398
399LCL(H_maxcache) = 3 /* # of cache table entries */
400LCL(H_csize) = 4*LCL(H_maxcache) /* size of each cache array */
401
402LCL(H_hash_ptr) = 0 /* hash table to use */
403LCL(H_unique_ptr) = LCL(H_hash_ptr)+4 /* function unique pointer */
404LCL(H_prof) = LCL(H_unique_ptr)+4 /* prof statistics */
405LCL(H_cache_ptr) = LCL(H_prof)+P_size /* cache table of element pointers */
406LCL(H_size) = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */
407
408#define H_maxcache LCL(H_maxcache)
409#define H_csize LCL(H_csize)
410#define H_hash_ptr LCL(H_hash_ptr)
411#define H_unique_ptr LCL(H_unique_ptr)
412#define H_prof LCL(H_prof)
413#define H_cache_ptr LCL(H_cache_ptr)
414#define H_size LCL(H_size)
415
416/*
417 * Number of digits needed to write a 64 bit number including trailing null.
418 * (rounded up to be divisable by 4).
419 */
420
421#define N_digit 24
422
423\f
424 .data
425
426/*
427 * Default gprof hash table size, which must be a power of two.
428 * The shift specifies how many low order bits to eliminate when
429 * calculating the hash value.
430 */
431
432#ifndef GPROF_HASH_SIZE
433#define GPROF_HASH_SIZE 16384
434#endif
435
436#ifndef GPROF_HASH_SHIFT
437#define GPROF_HASH_SHIFT 9
438#endif
439
440#define GPROF_HASH_MASK (GPROF_HASH_SIZE-1)
441
442DATA(_profile_hash_size)
443 .long GPROF_HASH_SIZE
444ENDDATA(_profile_hash_size)
445
446\f
447
448/*
449 * Pointer that the compiler uses to call to the appropriate mcount function.
450 */
451
452DATA(_mcount_ptr)
453 .long EXT(_dummy_mcount)
454ENDDATA(_mcount_ptr)
455
456/*
457 * Global profile variables. The structure that accesses this in C is declared
458 * in profile-internal.h. All items in .data that follow this will be used as
459 * one giant record, and each unique machine, thread, kgmon output or what have
460 * you will create a separate instance. Typically there is only one instance
461 * which will be the memory laid out below.
462 */
463
464LCL(var_major_version) = 0 /* major version number */
465LCL(var_minor_version) = LCL(var_major_version)+4 /* minor version number */
466LCL(vars_size) = LCL(var_minor_version)+4 /* size of _profile_vars structure */
467LCL(plist_size) = LCL(vars_size)+4 /* size of page_list structure */
468LCL(acontext_size) = LCL(plist_size)+4 /* size of allocation contexts */
469LCL(callback_size) = LCL(acontext_size)+4 /* size of callback structure */
470LCL(type) = LCL(callback_size)+4 /* profile type (gprof, prof) */
471LCL(error_msg) = LCL(type)+4 /* error message for perror */
472LCL(filename) = LCL(error_msg)+4 /* filename to write to */
473LCL(str_ptr) = LCL(filename)+4 /* string table pointer */
474LCL(stream) = LCL(str_ptr)+4 /* stdio stream to write to */
475LCL(diag_stream) = LCL(stream)+4 /* stdio stream to write diagnostics to */
476LCL(fwrite_func) = LCL(diag_stream)+4 /* function like fwrite to output bytes */
477LCL(page_size) = LCL(fwrite_func)+4 /* page size in bytes */
478LCL(str_bytes) = LCL(page_size)+4 /* # bytes in string table */
479LCL(str_total) = LCL(str_bytes)+4 /* # total bytes allocated for string table */
480LCL(clock_ticks) = LCL(str_total)+4 /* # clock ticks per second */
481
482 /* profil variables */
483LCL(profil_start) = LCL(clock_ticks)+4 /* start of profil variables */
484LCL(lowpc) = LCL(clock_ticks)+4 /* lowest address */
485LCL(highpc) = LCL(lowpc)+4 /* highest address */
486LCL(text_len) = LCL(highpc)+4 /* highpc-lowpc */
487LCL(profil_len) = LCL(text_len)+4 /* size of profil buffer */
488LCL(counter_size) = LCL(profil_len)+4 /* size of indivual counter */
489LCL(scale) = LCL(counter_size)+4 /* scale factor */
490LCL(profil_unused) = LCL(scale)+4 /* unused fields */
491LCL(profil_end) = LCL(profil_unused)+4*8 /* end of profil_info structure */
492LCL(profil_buf) = LCL(profil_end) /* buffer for profil */
493
494 /* Output selection func ptrs */
495LCL(output_init) = LCL(profil_buf)+4 /* Initialization */
496LCL(output) = LCL(output_init)+4 /* Write out profiling info */
497LCL(output_ptr) = LCL(output)+4 /* Output specific data ptr */
498
499 /* Memory allocation support */
500LCL(acontext) = LCL(output_ptr)+4 /* pointers to allocation context blocks */
501
502LCL(bogus_func) = LCL(acontext)+4*C_max /* function to use if gprof arc is bad */
503LCL(vars_unused) = LCL(bogus_func)+4 /* future growth */
504
505 /* flags */
506LCL(init) = LCL(vars_unused)+4*63 /* whether initializations were done */
507LCL(active) = LCL(init)+1 /* whether profiling is active */
508LCL(do_profile) = LCL(active)+1 /* whether to do profiling */
509LCL(use_dci) = LCL(do_profile)+1 /* whether to use DCI */
510LCL(use_profil) = LCL(use_dci)+1 /* whether to use profil */
511LCL(recursive_alloc) = LCL(use_profil)+1 /* alloc called recursively */
512LCL(output_uarea) = LCL(recursive_alloc)+1 /* output uarea */
513LCL(output_stats) = LCL(output_uarea)+1 /* output stats info */
514LCL(output_clock) = LCL(output_stats)+1 /* output the clock ticks */
515LCL(multiple_sections) = LCL(output_clock)+1 /* multiple sections are ok */
516LCL(have_bb) = LCL(multiple_sections)+1 /* whether we have basic block data */
517LCL(init_format) = LCL(have_bb)+1 /* The output format has been chosen */
518LCL(debug) = LCL(init_format)+1 /* Whether or not we are debugging */
519LCL(check_funcs) = LCL(debug)+1 /* Whether to check functions for validity */
520LCL(flag_unused) = LCL(check_funcs)+1 /* unused flags */
521LCL(end_of_vars) = LCL(flag_unused)+62 /* size of machine independent vars */
522
523/*
524 * Data that contains profile statistics that can be dumped out
525 * into the {,g}mon.out file. This is defined in profile-md.h.
526 */
527
528LCL(stats_start) = LCL(end_of_vars) /* start of stats substructure */
529LCL(stats_major_version)= LCL(stats_start) /* major version number */
530LCL(stats_minor_version)= LCL(stats_major_version)+4 /* minor version number */
531LCL(stats_size) = LCL(stats_minor_version)+4 /* size of _profile_stats structure */
532LCL(profil_buckets) = LCL(stats_size)+4 /* # profil buckets */
533LCL(my_cpu) = LCL(profil_buckets)+4 /* identify which cpu/thread this is */
534LCL(max_cpu) = LCL(my_cpu)+4 /* identify which cpu/thread this is */
535LCL(prof_records) = LCL(max_cpu)+4 /* # of profiled functions */
536LCL(gprof_records) = LCL(prof_records)+4 /* # of gprof arcs created */
537LCL(hash_buckets) = LCL(gprof_records)+4 /* max gprof hash buckets on a chain */
538LCL(bogus_count) = LCL(hash_buckets)+4 /* # bogus functions found in gprof */
539
540LCL(cnt) = LCL(bogus_count)+4 /* # of _{prof,gprof}_mcount calls */
541LCL(dummy) = LCL(cnt)+8 /* # of _dummy_mcount calls */
542LCL(old_mcount) = LCL(dummy)+8 /* # of old mcount calls */
543LCL(hash_search) = LCL(old_mcount)+8 /* # gprof hash buckets searched */
544LCL(hash_num) = LCL(hash_search)+8 /* # times hash table searched */
545LCL(user_ticks) = LCL(hash_num)+8 /* # ticks within user space */
546LCL(kernel_ticks) = LCL(user_ticks)+8 /* # ticks within kernel space */
547LCL(idle_ticks) = LCL(kernel_ticks)+8 /* # ticks cpu was idle */
548LCL(overflow_ticks) = LCL(idle_ticks)+8 /* # ticks where histcounter overflowed */
549LCL(acontext_locked) = LCL(overflow_ticks)+8 /* # times an acontext was locked */
550LCL(too_low) = LCL(acontext_locked)+8 /* # times histogram tick too low */
551LCL(too_high) = LCL(too_low)+8 /* # times histogram tick too low */
552LCL(prof_overflow) = LCL(too_high)+8 /* # times the prof count field overflowed */
553LCL(gprof_overflow) = LCL(prof_overflow)+8 /* # times the gprof count field overflowed */
554LCL(num_alloc) = LCL(gprof_overflow)+8 /* # allocations in each context */
555LCL(bytes_alloc) = LCL(num_alloc)+4*C_max /* bytes allocated in each context */
556LCL(num_context) = LCL(bytes_alloc)+4*C_max /* # allocation context blocks */
557LCL(wasted) = LCL(num_context)+4*C_max /* # bytes wasted */
558LCL(overhead) = LCL(wasted)+4*C_max /* # bytes of overhead */
559LCL(buckets) = LCL(overhead)+4*C_max /* # hash indexes that have n buckets */
560LCL(cache_hits1) = LCL(buckets)+4*10 /* # gprof cache hits in bucket #1 */
561LCL(cache_hits2) = LCL(cache_hits1)+8 /* # gprof cache hits in bucket #2 */
562LCL(cache_hits3) = LCL(cache_hits2)+8 /* # gprof cache hits in bucket #3 */
563LCL(stats_unused) = LCL(cache_hits3)+8 /* reserved for future use */
564LCL(stats_end) = LCL(stats_unused)+8*64 /* end of stats structure */
565
566/*
567 * Machine dependent variables that no C file should access (except for
568 * profile-md.c).
569 */
570
571LCL(md_start) = LCL(stats_end) /* start of md structure */
572LCL(md_major_version) = LCL(md_start) /* major version number */
573LCL(md_minor_version) = LCL(md_major_version)+4 /* minor version number */
574LCL(md_size) = LCL(md_minor_version)+4 /* size of _profile_stats structure */
575LCL(hash_ptr) = LCL(md_size)+4 /* gprof hash pointer */
576LCL(hash_size) = LCL(hash_ptr)+4 /* gprof hash size */
577LCL(num_cache) = LCL(hash_size)+4 /* # of cache entries */
578LCL(save_mcount_ptr) = LCL(num_cache)+4 /* save for mcount_ptr when suspending profiling */
579LCL(mcount_ptr_ptr) = LCL(save_mcount_ptr)+4 /* pointer to _mcount_ptr */
580LCL(dummy_ptr) = LCL(mcount_ptr_ptr)+4 /* pointer to gprof_dummy */
581LCL(alloc_pages) = LCL(dummy_ptr)+4 /* allocate more memory */
582LCL(num_buffer) = LCL(alloc_pages)+4 /* buffer to convert 64 bit ints in */
583LCL(md_unused) = LCL(num_buffer)+N_digit /* unused fields */
584LCL(md_end) = LCL(md_unused)+4*58 /* end of md structure */
585LCL(total_size) = LCL(md_end) /* size of entire structure */
586
587/*
588 * Size of the entire _profile_vars structure.
589 */
590
591DATA(_profile_size)
592 .long LCL(total_size)
593ENDDATA(_profile_size)
594
595/*
596 * Size of the statistics substructure.
597 */
598
599DATA(_profile_stats_size)
600 .long LCL(stats_end)-LCL(stats_start)
601ENDDATA(_profile_stats_size)
602
603/*
604 * Size of the profil info substructure.
605 */
606
607DATA(_profile_profil_size)
608 .long LCL(profil_end)-LCL(profil_start)
609ENDDATA(_profile_profil_size)
610
611/*
612 * Size of the machine dependent substructure.
613 */
614
615DATA(_profile_md_size)
616 .long LCL(md_end)-LCL(md_start)
617ENDDATA(_profile_profil_size)
618
619/*
620 * Whether statistics are supported.
621 */
622
623DATA(_profile_do_stats)
624 .long DO_STATS
625ENDDATA(_profile_do_stats)
626
627 .text
628
629/*
630 * Map LCL(xxx) -> into simpler names
631 */
632
633#define V_acontext LCL(acontext)
634#define V_acontext_locked LCL(acontext_locked)
635#define V_alloc_pages LCL(alloc_pages)
636#define V_bogus_func LCL(bogus_func)
637#define V_bytes_alloc LCL(bytes_alloc)
638#define V_cache_hits1 LCL(cache_hits1)
639#define V_cache_hits2 LCL(cache_hits2)
640#define V_cache_hits3 LCL(cache_hits3)
641#define V_cnt LCL(cnt)
642#define V_cnt_overflow LCL(cnt_overflow)
643#define V_check_funcs LCL(check_funcs)
644#define V_dummy LCL(dummy)
645#define V_dummy_overflow LCL(dummy_overflow)
646#define V_dummy_ptr LCL(dummy_ptr)
647#define V_gprof_records LCL(gprof_records)
648#define V_hash_num LCL(hash_num)
649#define V_hash_ptr LCL(hash_ptr)
650#define V_hash_search LCL(hash_search)
651#define V_mcount_ptr_ptr LCL(mcount_ptr_ptr)
652#define V_num_alloc LCL(num_alloc)
653#define V_num_buffer LCL(num_buffer)
654#define V_num_context LCL(num_context)
655#define V_old_mcount LCL(old_mcount)
656#define V_old_mcount_overflow LCL(old_mcount_overflow)
657#define V_overhead LCL(overhead)
658#define V_page_size LCL(page_size)
659#define V_prof_records LCL(prof_records)
660#define V_recursive_alloc LCL(recursive_alloc)
661#define V_wasted LCL(wasted)
662
663/*
664 * Loadup %ebx with the address of _profile_vars. On a multiprocessor, this
665 * will loads up the appropriate machine's _profile_vars structure.
666 * For ELF shared libraries, rely on the fact that we won't need a GOT,
667 * except to load this pointer.
668 */
669
91447636 670#if defined (MACH_KERNEL)
1c79356b 671#define ASSEMBLER
55e303ae 672#include <i386/mp.h>
1c79356b
A
673
674#if SQT
675#include <i386/SQT/asm_macros.h>
676#endif
677
678#ifndef CPU_NUMBER
679#error "Cannot determine how to get CPU number"
680#endif
681
682#define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx
683
91447636 684#else /* not kernel */
1c79356b
A
685#define Vload Gload; Egaddr(%ebx,_profile_vars)
686#endif
687
688\f
689/*
690 * Allocate some memory for profiling. This memory is guaranteed to
691 * be zero.
692 * %eax contains the memory size requested and will contain ptr on exit.
693 * %ebx contains the address of the appropriate profile_vars structure.
694 * %ecx is the number of the memory pool to allocate from (trashed on exit).
695 * %edx is trashed.
696 * %esi is preserved.
697 * %edi is preserved.
698 * %ebp is preserved.
699 */
700
701Entry(_profile_alloc_asm)
702 ENTER
703 pushl %esi
704 pushl %edi
705
706 movl %ecx,%edi /* move context number to saved reg */
707
708#if NO_RECURSIVE_ALLOC
709 movb $-1,%cl
710 xchgb %cl,V_recursive_alloc(%ebx)
711 cmpb $0,%cl
712 je LCL(no_recurse)
713
714 int $3
715
716 .align ALIGN
717LCL(no_recurse):
718#endif
719
720 leal V_acontext(%ebx,%edi,4),%ecx
721
722 /* Loop looking for a free allocation context. */
723 /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */
724 /* %edi = context number */
725
726 .align ALIGN
727LCL(alloc_loop):
728 movl %ecx,%esi /* save ptr in case no more contexts */
729 movl A_next(%ecx),%ecx /* next context block */
730 cmpl $0,%ecx
731 je LCL(alloc_context) /* need to allocate a new context block */
732
733 movl $-1,%edx
734 xchgl %edx,A_lock(%ecx) /* %edx == 0 if context available */
735
736#if DO_STATS
737 SDADDNEG(%edx,V_acontext_locked(%ebx)) /* increment counter if lock was held */
738#endif
739
740 cmpl $0,%edx
741 jne LCL(alloc_loop) /* go back if this context block is not available */
742
743 /* Allocation context found (%ecx), now allocate. */
744 movl A_plist(%ecx),%edx /* pointer to current block */
745 cmpl $0,%edx /* first allocation? */
746 je LCL(alloc_new)
747
748 cmpl %eax,M_nfree(%edx) /* see if we have enough space */
749 jl LCL(alloc_new) /* jump if not enough space */
750
751 /* Allocate from local block (and common exit) */
752 /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */
753 /* %edi = context number */
754
755 .align ALIGN
756LCL(alloc_ret):
757
758#if DO_STATS
759 SLOCK incl V_num_alloc(%ebx,%edi,4) /* update global counters */
760 SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4)
761 SLOCK subl %eax,V_wasted(%ebx,%edi,4)
762#endif
763
764 movl M_ptr(%edx),%esi /* pointer return value */
765 subl %eax,M_nfree(%edx) /* decrement bytes remaining */
766 addl %eax,M_nalloc(%edx) /* increment bytes allocated */
767 incl M_num(%edx) /* increment # allocations */
768 addl %eax,M_ptr(%edx) /* advance pointer */
769 movl $0,A_lock(%ecx) /* unlock context block */
770 movl %esi,%eax /* return pointer */
771
772#if NO_RECURSIVE_ALLOC
773 movb $0,V_recursive_alloc(%ebx)
774#endif
775
776 popl %edi
777 popl %esi
778 LEAVE0
779 ret /* return to the caller */
780
781 /* Allocate space in whole number of pages */
782 /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */
783 /* %edi = context number */
784
785 .align ALIGN
786LCL(alloc_new):
787 pushl %eax /* save regs */
788 pushl %ecx
789 movl V_page_size(%ebx),%edx
790 addl $(M_size-1),%eax /* add in overhead size & subtract 1 */
791 decl %edx /* page_size - 1 */
792 addl %edx,%eax /* round up to whole number of pages */
793 notl %edx
794 andl %edx,%eax
795 leal -M_size(%eax),%esi /* save allocation size */
796 pushl %eax /* argument to _profile_alloc_pages */
797 call *V_alloc_pages(%ebx) /* allocate some memory */
798 addl $4,%esp /* pop off argument */
799
800#if DO_STATS
801 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* udpate global counters */
55e303ae 802 SLOCK addl $(M_size),V_overhead(%ebx,%edi,4)
1c79356b
A
803#endif
804
805 popl %ecx /* context block */
806 movl %eax,%edx /* memory block pointer */
807 movl %esi,M_nfree(%edx) /* # free bytes */
55e303ae 808 addl $(M_size),%eax /* bump past overhead */
1c79356b
A
809 movl A_plist(%ecx),%esi /* previous memory block or 0 */
810 movl %eax,M_first(%edx) /* first space available */
811 movl %eax,M_ptr(%edx) /* current address available */
812 movl %esi,M_next(%edx) /* next memory block allocated */
813 movl %edx,A_plist(%ecx) /* update current page list */
814 popl %eax /* user size request */
815 jmp LCL(alloc_ret) /* goto common return code */
816
817 /* Allocate a context header in addition to memory block header + data */
818 /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */
819 /* %edi = context number */
820
821 .align ALIGN
822LCL(alloc_context):
823 pushl %eax /* save regs */
824 pushl %esi
825 movl V_page_size(%ebx),%edx
826 addl $(A_size+M_size-1),%eax /* add in overhead size & subtract 1 */
827 decl %edx /* page_size - 1 */
828 addl %edx,%eax /* round up to whole number of pages */
829 notl %edx
830 andl %edx,%eax
831 leal -A_size-M_size(%eax),%esi /* save allocation size */
832 pushl %eax /* argument to _profile_alloc_pages */
833 call *V_alloc_pages(%ebx) /* allocate some memory */
834 addl $4,%esp /* pop off argument */
835
836#if DO_STATS
837 SLOCK incl V_num_context(%ebx,%edi,4) /* bump # context blocks */
838 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* update global counters */
839 SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4)
840#endif
841
842 movl %eax,%ecx /* context pointer */
843 leal A_size(%eax),%edx /* memory block pointer */
844 movl %esi,M_nfree(%edx) /* # free bytes */
845 addl $(A_size+M_size),%eax /* bump past overhead */
846 movl %eax,M_first(%edx) /* first space available */
847 movl %eax,M_ptr(%edx) /* current address available */
848 movl $0,M_next(%edx) /* next memory block allocated */
849 movl %edx,A_plist(%ecx) /* head of memory block list */
850 movl $1,A_lock(%ecx) /* set lock */
851 popl %esi /* ptr to store context block link */
852 movl %ecx,%eax /* context pointer temp */
853 xchgl %eax,A_next(%esi) /* link into chain */
854 movl %eax,A_next(%ecx) /* add links in case of threading */
855 popl %eax /* user size request */
856 jmp LCL(alloc_ret) /* goto common return code */
857
858END(_profile_alloc_asm)
859
860/*
861 * C callable version of the profile memory allocator.
862 * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t);
863*/
864
865Entry(_profile_alloc)
866 ENTER
867 pushl %ebx
868 movl 12+Estack(%esp),%eax /* memory size */
869 movl 8+Estack(%esp),%ebx /* provile_vars address */
870 addl $3,%eax /* round up to word boundary */
871 movl 16+Estack(%esp),%ecx /* which memory pool to allocate from */
872 andl $0xfffffffc,%eax
873 call EXT(_profile_alloc_asm)
874 popl %ebx
875 LEAVE0
876 ret
877END(_profile_alloc)
878
879\f
880/*
881 * Dummy mcount routine that just returns.
882 *
883 * +-------------------------------+
884 * | |
885 * | |
886 * | caller's caller stack, |
887 * | saved registers, params. |
888 * | |
889 * | |
890 * +-------------------------------+
891 * | caller's caller return addr. |
892 * +-------------------------------+
893 * esp --> | caller's return address |
894 * +-------------------------------+
895 *
896 * edx --> function unqiue LCL
897 */
898
899Entry(_dummy_mcount)
900 ENTER
901
902#if DO_STATS
903 pushl %ebx
904 MP_DISABLE_PREEMPTION(%ebx)
905 Vload
906 SDINC(V_dummy(%ebx))
907 MP_ENABLE_PREEMPTION(%ebx)
908 popl %ebx
909#endif
910
911 LEAVE0
912 ret
913END(_dummy_mcount)
914
915\f
916/*
917 * Entry point for System V based profiling, count how many times each function
918 * is called. The function label is passed in %edx, and the top two words on
919 * the stack are the caller's address, and the caller's return address.
920 *
921 * +-------------------------------+
922 * | |
923 * | |
924 * | caller's caller stack, |
925 * | saved registers, params. |
926 * | |
927 * | |
928 * +-------------------------------+
929 * | caller's caller return addr. |
930 * +-------------------------------+
931 * esp --> | caller's return address |
932 * +-------------------------------+
933 *
934 * edx --> function unique label
935 *
936 * We don't worry about the possibility about two threads calling
937 * the same function for the first time simulataneously. If that
938 * happens, two records will be created, and one of the records
939 * address will be stored in in the function unique label (which
940 * is aligned by the compiler, so we don't have to watch out for
941 * crossing page/cache boundaries).
942 */
943
944Entry(_prof_mcount)
945 ENTER
946
947#if DO_STATS
948 pushl %ebx
949 MP_DISABLE_PREEMPTION(%ebx)
950 Vload
951 SDINC(V_cnt(%ebx))
952#endif
953
954 movl (%edx),%eax /* initialized? */
955 cmpl $0,%eax
956 je LCL(pnew)
957
958 DINC2(P_count(%eax),P_overflow(%eax)) /* bump function count (double precision) */
959
960#if DO_STATS
961 MP_ENABLE_PREEMPTION(%ebx)
962 popl %ebx
963#endif
964
965 LEAVE0
966 ret
967
968 .align ALIGN
969LCL(pnew):
970
971#if !DO_STATS
972 pushl %ebx
973 MP_DISABLE_PREEMPTION(%ebx)
974 Vload
975#endif
976
977 SLOCK incl V_prof_records(%ebx)
978 pushl %edx
55e303ae
A
979 movl $(P_size),%eax /* allocation size */
980 movl $(C_prof),%ecx /* allocation pool */
1c79356b
A
981 call EXT(_profile_alloc_asm) /* allocate a new record */
982 popl %edx
983
984 movl Estack+4(%esp),%ecx /* caller's address */
985 movl %ecx,P_addr(%eax)
986 movl $1,P_count(%eax) /* call count */
987 xchgl %eax,(%edx) /* update function header */
988 MP_ENABLE_PREEMPTION(%ebx)
989 popl %ebx
990 LEAVE0
991 ret
992
993END(_prof_mcount)
994
995\f
996/*
997 * Entry point for BSD based graph profiling, count how many times each unique
998 * call graph (caller + callee) is called. The function label is passed in
999 * %edx, and the top two words on the stack are the caller's address, and the
1000 * caller's return address.
1001 *
1002 * +-------------------------------+
1003 * | |
1004 * | |
1005 * | caller's caller stack, |
1006 * | saved registers, params. |
1007 * | |
1008 * | |
1009 * +-------------------------------+
1010 * | caller's caller return addr. |
1011 * +-------------------------------+
1012 * esp --> | caller's return address |
1013 * +-------------------------------+
1014 *
1015 * edx --> function unqiue label
1016 *
1017 * We don't worry about the possibility about two threads calling the same
1018 * function simulataneously. If that happens, two records will be created, and
1019 * one of the records address will be stored in in the function unique label
1020 * (which is aligned by the compiler).
1021 *
1022 * By design, the gprof header is not locked. Each of the cache pointers is
1023 * always a valid pointer (possibily to a null record), and if another thread
1024 * comes in and modifies the pointer, it does so automatically with a simple store.
1025 * Since all arcs are in the hash table, the caches are just to avoid doing
1026 * a multiplication in the common case, and if they don't match, the arcs will
1027 * still be found.
1028 */
1029
1030Entry(_gprof_mcount)
1031
1032 ENTER
1033 movl Estack+4(%esp),%ecx /* caller's caller address */
1034
1035#if DO_STATS
1036 pushl %ebx
1037 MP_DISABLE_PREEMPTION(%ebx)
1038 Vload
1039 SDINC(V_cnt(%ebx)) /* bump profile call counter (double int) */
1040#endif
1041
1042 movl (%edx),%eax /* Gprof header allocated? */
1043 cmpl $0,%eax
1044 je LCL(gnew) /* skip if first call */
1045
1046 DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax)) /* bump function count */
1047
1048 /* See if this call arc is the same as the last time */
1049MARK(_gprof_mcount_cache1)
1050 movl H_cache_ptr(%eax),%edx /* last arc searched */
1051 cmpl %ecx,G_frompc(%edx) /* skip if not equal */
1052 jne LCL(gcache2)
1053
1054 /* Same as last time, increment and return */
1055
1056 DINC2(G_count(%edx),G_overflow(%edx)) /* bump arc count */
1057
1058#if DO_STATS
1059 SDINC(V_cache_hits1(%ebx)) /* update counter */
1060 MP_ENABLE_PREEMPTION(%ebx)
1061 popl %ebx
1062#endif
1063
1064 LEAVE0
1065 ret
1066
1067 /* Search second cache entry */
1068 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1069 /* %edx = first arc searched */
1070 /* %ebx if DO_STATS pushed on stack */
1071
1072 .align ALIGN
1073MARK(_gprof_mcount_cache2)
1074LCL(gcache2):
1075 pushl %esi /* get a saved register */
1076 movl H_cache_ptr+4(%eax),%esi /* 2nd arc to be searched */
1077 cmpl %ecx,G_frompc(%esi) /* skip if not equal */
1078 jne LCL(gcache3)
1079
1080 /* Element found, increment, reset last arc searched and return */
1081
1082 DINC2(G_count(%esi),G_overflow(%esi)) /* bump arc count */
1083
1084 movl %esi,H_cache_ptr+0(%eax) /* swap 1st and 2nd cached arcs */
1085 popl %esi
1086 movl %edx,H_cache_ptr+4(%eax)
1087
1088#if DO_STATS
1089 SDINC(V_cache_hits2(%ebx)) /* update counter */
1090 MP_ENABLE_PREEMPTION(%ebx)
1091 popl %ebx
1092#endif
1093
1094 LEAVE0
1095 ret
1096
1097 /* Search third cache entry */
1098 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1099 /* %edx = first arc searched, %esi = second arc searched */
1100 /* %esi, %ebx if DO_STATS pushed on stack */
1101
1102 .align ALIGN
1103MARK(_gprof_mcount_cache3)
1104LCL(gcache3):
1105 pushl %edi
1106 movl H_cache_ptr+8(%eax),%edi /* 3rd arc to be searched */
1107 cmpl %ecx,G_frompc(%edi) /* skip if not equal */
1108 jne LCL(gnocache)
1109
1110 /* Element found, increment, reset last arc searched and return */
1111
1112 DINC2(G_count(%edi),G_overflow(%edi)) /* bump arc count */
1113
1114 movl %edi,H_cache_ptr+0(%eax) /* make this 1st cached arc */
1115 movl %esi,H_cache_ptr+8(%eax)
1116 movl %edx,H_cache_ptr+4(%eax)
1117 popl %edi
1118 popl %esi
1119
1120#if DO_STATS
1121 SDINC(V_cache_hits3(%ebx)) /* update counter */
1122 MP_ENABLE_PREEMPTION(%ebx)
1123 popl %ebx
1124#endif
1125
1126 LEAVE0
1127 ret
1128
1129 /* No function context, allocate a new context */
1130 /* %ebx is the variables address if DO_STATS */
1131 /* %ecx is the caller's caller's address */
1132 /* %edx is the unique function pointer */
1133 /* %ebx if DO_STATS pushed on stack */
1134
1135 .align ALIGN
1136MARK(_gprof_mcount_new)
1137LCL(gnew):
1138 pushl %esi
1139 pushl %edi
1140
1141#if !DO_STATS
1142 pushl %ebx /* Address of vars needed for alloc */
1143 MP_DISABLE_PREEMPTION(%ebx)
1144 Vload /* stats already loaded address */
1145#endif
1146
1147 SLOCK incl V_prof_records(%ebx)
1148 movl %edx,%esi /* save unique function ptr */
1149 movl %ecx,%edi /* and caller's caller address */
55e303ae
A
1150 movl $(H_size),%eax /* memory block size */
1151 movl $(C_gfunc),%ecx /* gprof function header memory pool */
1c79356b
A
1152 call EXT(_profile_alloc_asm)
1153
1154 movl V_hash_ptr(%ebx),%ecx /* copy hash_ptr to func header */
1155 movl V_dummy_ptr(%ebx),%edx /* dummy cache entry */
1156 movl %ecx,H_hash_ptr(%eax)
1157 movl %edx,H_cache_ptr+0(%eax) /* store dummy cache ptrs */
1158 movl %edx,H_cache_ptr+4(%eax)
1159 movl %edx,H_cache_ptr+8(%eax)
1160 movl %esi,H_unique_ptr(%eax) /* remember function unique ptr */
1161 movl Estack+12(%esp),%ecx /* caller's address */
1162 movl $1,H_prof+P_count(%eax) /* function called once so far */
1163 movl %ecx,H_prof+P_addr(%eax) /* set up prof information */
1164 movl %eax,(%esi) /* update context block address */
1165 movl %edi,%ecx /* caller's caller address */
1166 movl %edx,%esi /* 2nd cached arc */
1167
1168#if !DO_STATS
1169 popl %ebx
1170#endif
1171
1172 /* Fall through to add element to the hash table. This may involve */
1173 /* searching a few hash table elements that don't need to be searched */
1174 /* since we have a new element, but it allows the hash table function */
1175 /* to be specified in only one place */
1176
1177 /* Didn't find entry in cache, search the global hash table */
1178 /* %eax = gprof func header, %ebx = vars address if DO_STATS */
1179 /* %ecx = caller's caller */
1180 /* %edx, %esi = cached arcs that were searched */
1181 /* %edi, %esi, %ebx if DO_STATS pushed on stack */
1182
1183 .align ALIGN
1184MARK(_gprof_mcount_hash)
1185LCL(gnocache):
1186
1187 pushl %esi /* save 2nd arc searched */
1188 pushl %edx /* save 1st arc searched */
1189 movl %eax,%esi /* save gprof func header */
1190
1191#if DO_STATS
1192 SDINC(V_hash_num(%ebx))
1193 movl Estack+20(%esp),%edi /* caller's address */
1194#else
1195 movl Estack+16(%esp),%edi /* caller's address */
1196#endif
1197 movl %ecx,%eax /* caller's caller address */
1198 imull %edi,%eax /* multiply to get hash */
1199 movl H_hash_ptr(%esi),%edx /* hash pointer */
55e303ae
A
1200 shrl $(GPROF_HASH_SHIFT),%eax /* eliminate low order bits */
1201 andl $(GPROF_HASH_MASK),%eax /* mask to get hash value */
1c79356b
A
1202 leal 0(%edx,%eax,4),%eax /* pointer to hash bucket */
1203 movl %eax,%edx /* save hash bucket address */
1204
1205 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1206 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1207 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1208
1209 .align ALIGN
1210LCL(ghash):
1211 movl G_next(%eax),%eax /* get next hash element */
1212 cmpl $0,%eax /* end of line? */
1213 je LCL(ghashnew) /* skip if allocate new hash */
1214
1215#if DO_STATS
1216 SDINC(V_hash_search(%ebx))
1217#endif
1218
1219 cmpl G_selfpc(%eax),%edi /* loop back if not one we want */
1220 jne LCL(ghash)
1221
1222 cmpl G_frompc(%eax),%ecx /* loop back if not one we want */
1223 jne LCL(ghash)
1224
1225 /* Found an entry, increment count, set up for caching, and return */
1226 /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */
1227 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1228
1229 DINC2(G_count(%eax),G_overflow(%eax)) /* bump arc count */
1230
1231 popl %ecx /* previous 1st arc searched */
1232 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1233 popl %edi /* previous 2nd arc searched */
1234 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1235 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1236 popl %edi
1237 popl %esi
1238
1239#if DO_STATS
1240 MP_ENABLE_PREEMPTION(%ebx)
1241 popl %ebx
1242#endif
1243
1244 LEAVE0
1245 ret /* return to user */
1246
1247 /* Allocate new arc */
1248 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1249 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1250 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1251
1252 .align ALIGN
1253MARK(_gprof_mcount_hashnew)
1254LCL(ghashnew):
1255
1256#if !DO_STATS
1257 pushl %ebx /* load address of vars if we haven't */
1258 MP_DISABLE_PREEMPTION(%ebx)
1259 Vload /* already done so */
1260#endif
1261
1262 SLOCK incl V_gprof_records(%ebx)
1263 pushl %edx
1264 movl %ecx,%edi /* save caller's caller */
55e303ae
A
1265 movl $(G_size),%eax /* arc size */
1266 movl $(C_gprof),%ecx /* gprof memory pool */
1c79356b
A
1267 call EXT(_profile_alloc_asm)
1268 popl %edx
1269
1270 movl $1,G_count(%eax) /* set call count */
1271 movl Estack+20(%esp),%ecx /* caller's address */
1272 movl %edi,G_frompc(%eax) /* caller's caller */
1273 movl %ecx,G_selfpc(%eax)
1274
1275#if !DO_STATS
1276 popl %ebx /* release %ebx if no stats */
1277#endif
1278
1279 movl (%edx),%ecx /* first hash bucket */
1280 movl %ecx,G_next(%eax) /* update link */
1281 movl %eax,%ecx /* copy for xchgl */
1282 xchgl %ecx,(%edx) /* add to hash linked list */
1283 movl %ecx,G_next(%eax) /* update in case list changed */
1284
1285 popl %ecx /* previous 1st arc searched */
1286 popl %edi /* previous 2nd arc searched */
1287 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1288 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1289 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1290
1291 popl %edi
1292 popl %esi
1293
1294#if DO_STATS
1295 MP_ENABLE_PREEMPTION(%ebx)
1296 popl %ebx
1297#endif
1298
1299 LEAVE0
1300 ret /* return to user */
1301
1302END(_gprof_mcount)
1303
1304\f
1305/*
1306 * This function assumes that neither the caller or it's caller
1307 * has not omitted the frame pointer in order to get the caller's
1308 * caller. The stack looks like the following at the time of the call:
1309 *
1310 * +-------------------------------+
1311 * | |
1312 * | |
1313 * | caller's caller stack, |
1314 * | saved registers, params. |
1315 * | |
1316 * | |
1317 * +-------------------------------+
1318 * | caller's caller return addr. |
1319 * +-------------------------------+
1320 * fp --> | previous frame pointer |
1321 * +-------------------------------+
1322 * | |
1323 * | caller's stack, saved regs, |
1324 * | params. |
1325 * | |
1326 * +-------------------------------+
1327 * sp --> | caller's return address |
1328 * +-------------------------------+
1329 *
1330 * Recent versions of the compiler put the address of the pointer
1331 * sized word in %edx. Previous versions did not, but this code
1332 * does not support them.
1333 */
1334
1335/*
1336 * Note that OSF/rose blew defining _mcount, since it prepends leading
1337 * underscores, and _mcount didn't have a second leading underscore. However,
1338 * some of the kernel/server functions 'know' that mcount has a leading
1339 * underscore, so we satisfy both camps.
1340 */
1341
1342#if OLD_MCOUNT
1343 .globl mcount
1344 .globl _mcount
1345 ELF_FUNC(mcount)
1346 ELF_FUNC(_mcount)
1347 .align FALIGN
1348_mcount:
1349mcount:
1350
1351 pushl %ebx
1352 MP_DISABLE_PREEMPTION(%ebx)
1353 Vload
1354
1355#if DO_STATS
1356 SDINC(V_old_mcount(%ebx))
1357#endif
1358
1359 /* In calling the functions, we will actually leave 1 extra word on the */
1360 /* top of the stack, but generated code will not notice, since the function */
1361 /* uses a frame pointer */
1362
1363 movl V_mcount_ptr_ptr(%ebx),%ecx /* address of mcount_ptr */
1364 MP_ENABLE_PREEMPTION(%ebx)
1365 popl %ebx
1366 movl 4(%ebp),%eax /* caller's caller return address */
1367 xchgl %eax,(%esp) /* push & get return address */
1368 pushl %eax /* push return address */
1369 jmp *(%ecx) /* go to profile the function */
1370
1371End(mcount)
1372End(_mcount)
1373#endif
1374
1375\f
1376#if !defined(KERNEL) && !defined(MACH_KERNEL)
1377
1378/*
1379 * Convert a 64-bit integer to a string.
1380 * Arg #1 is a pointer to a string (at least 24 bytes) or NULL
1381 * Arg #2 is the low part of the 64-bit integer.
1382 * Arg #3 is the high part of the 64-bit integer.
1383 */
1384
1385Entry(_profile_cnt_to_decimal)
1386 ENTER
1387 pushl %ebx
1388 pushl %esi
1389 pushl %edi
1390 movl Estack+16(%esp),%ebx /* pointer or null */
1391 movl Estack+20(%esp),%edi /* low part of number */
1392 movl $10,%ecx /* divisor */
1393 cmpl $0,%ebx /* skip if pointer ok */
1394 jne LCL(cvt_nonnull)
1395
1396 MP_DISABLE_PREEMPTION(%ebx)
1397 Vload /* get _profile_vars address */
1398 leal V_num_buffer(%ebx),%ebx /* temp buffer to use */
1399
1400 .align ALIGN
1401LCL(cvt_nonnull):
1402 addl $(N_digit-1),%ebx /* point string at end */
1403 movb $0,0(%ebx) /* null terminate string */
1404
1405#if OVERFLOW
1406 movl Estack+24(%esp),%esi /* high part of number */
1407 cmpl $0,%esi /* any thing left in high part? */
1408 je LCL(cvt_low)
1409
1410 .align ALIGN
1411LCL(cvt_high):
1412 movl %esi,%eax /* calculate high/10 & high%10 */
1413 xorl %edx,%edx
1414 divl %ecx
1415 movl %eax,%esi
1416
1417 movl %edi,%eax /* calculate (low + (high%10)*2^32) / 10 */
1418 divl %ecx
1419 movl %eax,%edi
1420
1421 decl %ebx /* decrement string pointer */
1422 addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1423 movb %dl,0(%ebx) /* store digit in string */
1424 cmpl $0,%esi /* any thing left in high part? */
1425 jne LCL(cvt_high)
1426
1427#endif /* OVERFLOW */
1428
1429 .align ALIGN
1430LCL(cvt_low):
1431 movl %edi,%eax /* get low part into %eax */
1432
1433 .align ALIGN
1434LCL(cvt_low2):
1435 xorl %edx,%edx /* 0 */
1436 divl %ecx /* calculate next digit */
1437 decl %ebx /* decrement string pointer */
1438 addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1439 movb %dl,0(%ebx) /* store digit in string */
1440 cmpl $0,%eax /* any more digits to convert? */
1441 jne LCL(cvt_low2)
1442
1443 movl %ebx,%eax /* return value */
1444 popl %edi
1445 popl %esi
1446 MP_ENABLE_PREEMPTION(%ebx)
1447 popl %ebx
1448 LEAVE0
1449 ret
1450
1451END(_profile_cnt_to_decimal)
1452
1453#endif