]>
Commit | Line | Data |
---|---|---|
b0d623f7 A |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms | |
5 | * of the Common Development and Distribution License | |
6 | * (the "License"). You may not use this file except | |
7 | * in compliance with the License. | |
8 | * | |
9 | * You can obtain a copy of the license at | |
10 | * src/OPENSOLARIS.LICENSE | |
11 | * or http://www.opensolaris.org/os/licensing. | |
12 | * See the License for the specific language governing | |
13 | * permissions and limitations under the License. | |
14 | * | |
15 | * When distributing Covered Code, include this CDDL | |
16 | * HEADER in each file and include the License file at | |
17 | * usr/src/OPENSOLARIS.LICENSE. If applicable, | |
18 | * add the following below this CDDL HEADER, with the | |
19 | * fields enclosed by brackets "[]" replaced with your | |
20 | * own identifying information: Portions Copyright [yyyy] | |
21 | * [name of copyright owner] | |
22 | * | |
23 | * CDDL HEADER END | |
24 | */ | |
25 | ||
26 | /* | |
27 | * Copyright 2007 Sun Microsystems, Inc. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | */ | |
30 | ||
31 | /* | |
32 | * routine to benchmark cache-to-cache transfer times... uses | |
33 | * solaris features to find and bind to cpus in the current | |
34 | * processor set, so not likely to work elsewhere. | |
35 | */ | |
36 | ||
37 | ||
38 | #include <unistd.h> | |
39 | #include <stdlib.h> | |
40 | #include <stdio.h> | |
41 | #include <fcntl.h> | |
42 | #include <string.h> | |
43 | #include <sys/processor.h> | |
44 | #include <sys/types.h> | |
45 | #include <stdio.h> | |
46 | #include <errno.h> | |
47 | #include <sys/pset.h> | |
48 | ||
49 | #include "libmicro.h" | |
50 | ||
51 | static long opts = 1024*512; | |
52 | ||
53 | typedef struct { | |
54 | long **ts_data; | |
55 | long ts_result; | |
56 | pthread_mutex_t ts_lock; | |
57 | } tsd_t; | |
58 | ||
59 | static unsigned int ncpu = 1024; | |
60 | ||
61 | static tsd_t *thread_data[1024]; | |
62 | static processorid_t cpus[1024]; | |
63 | ||
64 | int traverse_ptrchain(long **, int, int); | |
65 | ||
66 | int | |
67 | benchmark_init() | |
68 | { | |
69 | lm_tsdsize = sizeof (tsd_t); | |
70 | ||
71 | (void) sprintf(lm_optstr, "s:"); | |
72 | ||
73 | (void) sprintf(lm_usage, | |
74 | " [-s size] size of access area in bytes" | |
75 | " (default %ld)\n" | |
76 | "notes: measures cache to cache transfer times on Solaris\n", | |
77 | opts); | |
78 | ||
79 | (void) sprintf(lm_header, "%8s", "size"); | |
80 | ||
81 | return (0); | |
82 | } | |
83 | ||
84 | int | |
85 | benchmark_optswitch(int opt, char *optarg) | |
86 | { | |
87 | switch (opt) { | |
88 | case 's': | |
89 | opts = sizetoint(optarg); | |
90 | break; | |
91 | default: | |
92 | return (-1); | |
93 | } | |
94 | ||
95 | return (0); | |
96 | } | |
97 | ||
98 | int | |
99 | benchmark_initrun() | |
100 | { | |
101 | if (pset_info(PS_MYID, NULL, &ncpu, cpus) < 0) { | |
102 | perror("pset_info"); | |
103 | return (1); | |
104 | } | |
105 | ||
106 | return (0); | |
107 | } | |
108 | ||
109 | int | |
110 | benchmark_initworker(void *tsd) | |
111 | { | |
112 | tsd_t *ts = (tsd_t *)tsd; | |
113 | int i, j; | |
114 | processorid_t cpu; | |
115 | ||
116 | ts->ts_data = malloc(opts); | |
117 | ||
118 | if (ts->ts_data == NULL) { | |
119 | return (1); | |
120 | } | |
121 | ||
122 | (void) pthread_mutex_init(&ts->ts_lock, NULL); | |
123 | ||
124 | ||
125 | if (processor_bind(P_LWPID, P_MYID, | |
126 | cpu = cpus[(pthread_self() - 1) % ncpu], | |
127 | NULL) < 0) { | |
128 | perror("processor_bind:"); | |
129 | return (1); | |
130 | } | |
131 | ||
132 | (void) printf("# thread %d using processor %d\n", pthread_self(), cpu); | |
133 | ||
134 | /* | |
135 | * use lmbench style backwards stride | |
136 | */ | |
137 | ||
138 | for (i = 0; i < opts / sizeof (long); i++) { | |
139 | j = i - 128; | |
140 | if (j < 0) | |
141 | j = j + opts / sizeof (long); | |
142 | ts->ts_data[i] = (long *)&(ts->ts_data[j]); | |
143 | } | |
144 | ||
145 | thread_data[pthread_self() - 1] = ts; | |
146 | ||
147 | return (0); | |
148 | } | |
149 | ||
150 | /* | |
151 | * here we go in order for each thread, causing inherent serialization | |
152 | * this is normally not a good idea, but in this case we're trying to | |
153 | * measure cache-to-cache transfer times, and if we run threads in | |
154 | * parallel we're likely to see saturation effects rather than cache-to-cache, | |
155 | * esp. on wimpy memory platforms like P4. | |
156 | */ | |
157 | ||
158 | ||
159 | /*ARGSUSED*/ | |
160 | int | |
161 | benchmark(void *tsd, result_t *res) | |
162 | { | |
163 | tsd_t *ts; | |
164 | int i, j; | |
165 | int count = opts / 128 / sizeof (long); | |
166 | ||
167 | for (j = 0; j < lm_optB; j++) | |
168 | for (i = 0; i < lm_optT; i++) { | |
169 | ts = thread_data[i]; | |
170 | (void) pthread_mutex_lock(&ts->ts_lock); | |
171 | ts->ts_result += traverse_ptrchain( | |
172 | (long **)ts->ts_data, count, 0); | |
173 | (void) pthread_mutex_unlock(&ts->ts_lock); | |
174 | } | |
175 | ||
176 | res->re_count = lm_optB * lm_optT * count; | |
177 | ||
178 | return (0); | |
179 | } | |
180 | ||
181 | int | |
182 | traverse_ptrchain(long **ptr, int count, int value) | |
183 | { | |
184 | int i; | |
185 | ||
186 | for (i = 0; i < count; i += 10) { | |
187 | *ptr = *ptr + value; | |
188 | ptr = (long **)*ptr; | |
189 | *ptr = *ptr + value; | |
190 | ptr = (long **)*ptr; | |
191 | *ptr = *ptr + value; | |
192 | ptr = (long **)*ptr; | |
193 | *ptr = *ptr + value; | |
194 | ptr = (long **)*ptr; | |
195 | *ptr = *ptr + value; | |
196 | ptr = (long **)*ptr; | |
197 | *ptr = *ptr + value; | |
198 | ptr = (long **)*ptr; | |
199 | *ptr = *ptr + value; | |
200 | ptr = (long **)*ptr; | |
201 | *ptr = *ptr + value; | |
202 | ptr = (long **)*ptr; | |
203 | *ptr = *ptr + value; | |
204 | ptr = (long **)*ptr; | |
205 | *ptr = *ptr + value; | |
206 | ptr = (long **)*ptr; | |
207 | *ptr = *ptr + value; | |
208 | } | |
209 | return ((int)*ptr); /* bogus return */ | |
210 | } | |
211 | ||
212 | ||
213 | char * | |
214 | benchmark_result() | |
215 | { | |
216 | static char result[256]; | |
217 | ||
218 | (void) sprintf(result, "%8ld ", opts); | |
219 | ||
220 | ||
221 | return (result); | |
222 | } |