]> git.saurik.com Git - apple/xnu.git/blob - tools/tests/libMicro/apple/lmbench_lat_ctx.c
2cbe790c275c86be46eb08fd047e5f69c0acdbd3
[apple/xnu.git] / tools / tests / libMicro / apple / lmbench_lat_ctx.c
1 /*
2 * Copyright (c) 2006 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30 /*
31 * Order of Execution
32 *
33 * benchmark_init
34 *
35 * benchmark_optswitch
36 *
37 * benchmark_initrun
38 *
39 * benchmark_initworker
40 * benchmark_initbatch
41 * benchmark
42 * benchmark_finibatch
43 * benchmark_initbatch
44 * benchmark
45 * benchmark_finibatch, etc.
46 * benchmark_finiworker
47 *
48 * benchmark_result
49 *
50 * benchmark_finirun
51 *
52 * benchmark_fini
53 */
54
55
56
57 #ifdef __sun
58 #pragma ident "@(#)trivial.c 1.0 08/17/06 Apple Inc."
59 #endif
60
61
62
63 #include <unistd.h>
64 #include <stdlib.h>
65 #include <stdio.h>
66
67 #include <signal.h>
68 #include <strings.h>
69
70 #include <sys/sysctl.h>
71 #include "../libmicro.h"
72
73 #if 1
74 # define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args)
75 #else
76 # define debug(fmt, args...)
77 #endif
78
79
80 #define MAXPROC 2048
81 #define CHUNK (4<<10)
82 #define TRIPS 5
83 #ifndef max
84 #define max(a, b) ((a) > (b) ? (a) : (b))
85 #endif
86
87
88 /*
89 * Your state variables should live in the tsd_t struct below
90 */
91 typedef struct {
92 int process_size;
93 double overhead;
94 int procs;
95 pid_t* pids;
96 int **p;
97 void* data;
98 } tsd_t;
99
100 static int opts = 1;
101
102 void doit(int rd, int wr, int process_size);
103 int create_pipes(int **p, int procs);
104 int create_daemons(int **p, pid_t *pids, int procs, int process_size);
105 void initialize_overhead(void* tsd);
106 void cleanup_overhead(void* tsd);
107 void benchmark_overhead(void* tsd);
108 void initialize(void* tsd);
109 void cleanup(void* tsd);
110 long bread(void* buf, long nbytes);
111
112
113 #pragma mark *** lmbench routines
114
115 /*
116 * lmbench routines, etc. brought over for this benchmark
117 */
118
119 void
120 morefds(void)
121 {
122 #ifdef RLIMIT_NOFILE
123 struct rlimit r;
124
125 getrlimit(RLIMIT_NOFILE, &r);
126 r.rlim_cur = r.rlim_max;
127 setrlimit(RLIMIT_NOFILE, &r);
128 #endif
129 }
130
131 void
132 doit(int rd, int wr, int process_size)
133 {
134 int msg;
135 void* data = NULL;
136
137 if (process_size) {
138 data = malloc(process_size);
139 if (data) bzero(data, process_size);
140 }
141 for ( ;; ) {
142 if (read(rd, &msg, sizeof(msg)) != sizeof(msg)) {
143 debug("read/write on pipe");
144 break;
145 }
146 bread(data, process_size);
147 if (write(wr, &msg, sizeof(msg)) != sizeof(msg)) {
148 debug("read/write on pipe");
149 break;
150 }
151 }
152 exit(0);
153 }
154
155 /*
156 * Return the number of processors in this host
157 */
158 int
159 sched_ncpus()
160 {
161 #ifdef MP_NPROCS
162 /* SGI IRIX interface */
163 return sysmp(MP_NPROCS);
164 #elif defined(HAVE_MPCTL)
165 /* HP-UX interface */
166 return mpctl(MPC_GETNUMSPUS_SYS, 0, 0);
167 #elif defined(_SC_NPROCESSORS_ONLN)
168 /* AIX, Solaris, and Linux interface */
169 return sysconf(_SC_NPROCESSORS_ONLN);
170 #elif __APPLE__
171 char *name="hw.activecpu";
172 int cpus, retval;
173 size_t len = 4;
174 retval=sysctlbyname(name, &cpus, &len, NULL, 0);
175 /* Check retval here */
176 debug("cpus = %d retval = %d", cpus, retval);
177 return cpus;
178 #endif
179 return 1;
180 }
181
182 /*
183 * Use to get sequentially created processes "far" away from
184 * each other in an SMP.
185 *
186 * XXX: probably doesn't work for NCPUS not a power of two.
187 */
188 int
189 reverse_bits(int cpu)
190 {
191 int i;
192 int nbits;
193 int max = sched_ncpus() - 1;
194 int cpu_reverse = 0;
195
196 for (i = max>>1, nbits = 1; i > 0; i >>= 1, nbits++)
197 ;
198 /* now reverse the bits */
199 for (i = 0; i < nbits; i++) {
200 if (cpu & (1<<i))
201 cpu_reverse |= (1<<(nbits-i-1));
202 }
203 return cpu_reverse;
204 }
205
206
207 /*
208 * The interface used by benchmp.
209 *
210 * childno is the "logical" child id number.
211 * In range [0, ..., parallel-1].
212 * benchproc is the "logical" id within the benchmark process. The
213 * benchmp-created process is logical ID zero, child processes
214 * created by the benchmark range from [1, ..., nbenchprocs].
215 * nbenchprocs is the number of child processes that each benchmark
216 * process will create. Most benchmarks will leave this zero,
217 * but some such as the pipe() benchmarks will not.
218 */
219 int
220 handle_scheduler(int childno, int benchproc, int nbenchprocs)
221 {
222 int cpu = 0;
223 char* sched = getenv("LMBENCH_SCHED");
224
225 if (!sched || strcasecmp(sched, "DEFAULT") == 0) {
226 /* do nothing. Allow scheduler to control placement */
227 return 0;
228 } else if (strcasecmp(sched, "SINGLE") == 0) {
229 /* assign all processes to CPU 0 */
230 cpu = 0;
231 } else if (strcasecmp(sched, "BALANCED") == 0) {
232 /* assign each benchmark process to its own processor,
233 * but child processes will share the CPU with the
234 * parent.
235 */
236 cpu = childno;
237 } else if (strcasecmp(sched, "BALANCED_SPREAD") == 0) {
238 /*
239 * assign each benchmark process to its own processor,
240 * logically as far away from neighboring IDs as
241 * possible. This can help identify bus contention
242 * issues in SMPs with hierarchical busses or NUMA
243 * memory.
244 */
245 cpu = reverse_bits(childno);
246 } else if (strcasecmp(sched, "UNIQUE") == 0) {
247 /*
248 * assign each benchmark process and each child process
249 * to its own processor.
250 */
251 cpu = childno * (nbenchprocs + 1) + benchproc;
252 } else if (strcasecmp(sched, "UNIQUE_SPREAD") == 0) {
253 /*
254 * assign each benchmark process and each child process
255 * to its own processor, logically as far away from
256 * neighboring IDs as possible. This can help identify
257 * bus contention issues in SMPs with hierarchical busses
258 * or NUMA memory.
259 */
260 cpu = reverse_bits(childno * (nbenchprocs + 1) + benchproc);
261 }
262 #if 0 // BLOB
263 else if (strncasecmp(sched, "CUSTOM ", strlen("CUSTOM ")) == 0) {
264 cpu = custom(sched + strlen("CUSTOM"), childno);
265 } else if (strncasecmp(sched, "CUSTOM_UNIQUE ", strlen("CUSTOM_UNIQUE ")) == 0) {
266 cpu = custom(sched + strlen("CUSTOM_UNIQUE"),
267 childno * (nbenchprocs + 1) + benchproc);
268 }
269 #endif // BLOB
270 else {
271 /* default action: do nothing */
272 return 0;
273 }
274 debug("cpu = %d, sched_ncpus() = %d", cpu, sched_ncpus());
275 return 0;
276 // return sched_pin(cpu % sched_ncpus());
277 }
278
279 int
280 create_daemons(int **p, pid_t *pids, int procs, int process_size)
281 {
282 int i, j;
283 int msg;
284
285 /*
286 * Use the pipes as a ring, and fork off a bunch of processes
287 * to pass the byte through their part of the ring.
288 *
289 * Do the sum in each process and get that time before moving on.
290 */
291 handle_scheduler(getpid(), 0, procs-1);
292 for (i = 1; i < procs; ++i) {
293 switch (pids[i] = fork()) {
294 case -1: /* could not fork, out of processes? */
295 return i;
296
297 case 0: /* child */
298 handle_scheduler(getpid(), i, procs-1);
299 for (j = 0; j < procs; ++j) {
300 if (j != i - 1) close(p[j][0]);
301 if (j != i) close(p[j][1]);
302 }
303 doit(p[i-1][0], p[i][1], process_size);
304 /* NOTREACHED */
305
306 default: /* parent */
307 ;
308 }
309 }
310
311 /*
312 * Go once around the loop to make sure that everyone is ready and
313 * to get the token in the pipeline.
314 */
315 if (write(p[0][1], &msg, sizeof(msg)) != sizeof(msg) ||
316 read(p[procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) {
317 debug("write/read/write on pipe");
318 exit(1);
319 }
320 return procs;
321 }
322
323 int
324 create_pipes(int **p, int procs)
325 {
326 int i;
327 /*
328 * Get a bunch of pipes.
329 */
330 morefds();
331 for (i = 0; i < procs; ++i) {
332 if (pipe(p[i]) == -1) {
333 return i;
334 }
335 }
336 return procs;
337 }
338
339 void
340 initialize_overhead(void* cookie)
341 {
342 int i;
343 int procs;
344 int* p;
345 tsd_t *pState = (tsd_t *)cookie;
346
347 pState->pids = NULL;
348 pState->p = (int**)malloc(pState->procs * (sizeof(int*) + 2 * sizeof(int)));
349 p = (int*)&pState->p[pState->procs];
350 for (i = 0; i < pState->procs; ++i) {
351 pState->p[i] = p;
352 p += 2;
353 }
354
355 pState->data = (pState->process_size > 0) ? malloc(pState->process_size) : NULL;
356 if (pState->data)
357 bzero(pState->data, pState->process_size);
358
359 procs = create_pipes(pState->p, pState->procs);
360 if (procs < pState->procs) {
361 debug("procs < pState->procs");
362 cleanup_overhead(cookie);
363 exit(1);
364 }
365 }
366
367 void
368 cleanup_overhead(void* tsd)
369 {
370 int i;
371 tsd_t *ts = (tsd_t *)tsd;
372
373 for (i = 0; i < ts->procs; ++i) {
374 close(ts->p[i][0]);
375 close(ts->p[i][1]);
376 }
377
378 free(ts->p);
379 if (ts->data) free(ts->data);
380 }
381
382 void
383 cleanup(void* cookie)
384 {
385 int i;
386 tsd_t *pState = (tsd_t *)cookie;
387
388
389 /*
390 * Close the pipes and kill the children.
391 */
392 cleanup_overhead(cookie);
393 for (i = 1; pState->pids && i < pState->procs; ++i) {
394 if (pState->pids[i] > 0) {
395 kill(pState->pids[i], SIGKILL);
396 waitpid(pState->pids[i], NULL, 0);
397 }
398 }
399 if (pState->pids)
400 free(pState->pids);
401 pState->pids = NULL;
402 }
403
404 void
405 benchmark_overhead(void* tsd)
406 {
407 tsd_t *ts = (tsd_t *)tsd;
408 int i = 0;
409 int msg = 1;
410
411 for (i = 0; i < lm_optB; i++) {
412 if (write(ts->p[i][1], &msg, sizeof(msg)) != sizeof(msg)) {
413 debug("read/write on pipe");
414 exit(1);
415 }
416 if (read(ts->p[i][0], &msg, sizeof(msg)) != sizeof(msg)) {
417 debug("read/write on pipe");
418 exit(1);
419 }
420 if (++i == ts->procs) {
421 i = 0;
422 }
423 bread(ts->data, ts->process_size);
424 }
425 }
426
427 /* analogous to bzero, bcopy, etc., except that it just reads
428 * data into the processor
429 */
430 long
431 bread(void* buf, long nbytes)
432 {
433 long sum = 0;
434 register long *p, *next;
435 register char *end;
436
437 p = (long*)buf;
438 end = (char*)buf + nbytes;
439 for (next = p + 128; (void*)next <= (void*)end; p = next, next += 128) {
440 sum +=
441 p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+
442 p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+
443 p[15]+p[16]+p[17]+p[18]+p[19]+p[20]+p[21]+
444 p[22]+p[23]+p[24]+p[25]+p[26]+p[27]+p[28]+
445 p[29]+p[30]+p[31]+p[32]+p[33]+p[34]+p[35]+
446 p[36]+p[37]+p[38]+p[39]+p[40]+p[41]+p[42]+
447 p[43]+p[44]+p[45]+p[46]+p[47]+p[48]+p[49]+
448 p[50]+p[51]+p[52]+p[53]+p[54]+p[55]+p[56]+
449 p[57]+p[58]+p[59]+p[60]+p[61]+p[62]+p[63]+
450 p[64]+p[65]+p[66]+p[67]+p[68]+p[69]+p[70]+
451 p[71]+p[72]+p[73]+p[74]+p[75]+p[76]+p[77]+
452 p[78]+p[79]+p[80]+p[81]+p[82]+p[83]+p[84]+
453 p[85]+p[86]+p[87]+p[88]+p[89]+p[90]+p[91]+
454 p[92]+p[93]+p[94]+p[95]+p[96]+p[97]+p[98]+
455 p[99]+p[100]+p[101]+p[102]+p[103]+p[104]+
456 p[105]+p[106]+p[107]+p[108]+p[109]+p[110]+
457 p[111]+p[112]+p[113]+p[114]+p[115]+p[116]+
458 p[117]+p[118]+p[119]+p[120]+p[121]+p[122]+
459 p[123]+p[124]+p[125]+p[126]+p[127];
460 }
461 for (next = p + 16; (void*)next <= (void*)end; p = next, next += 16) {
462 sum +=
463 p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+
464 p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+
465 p[15];
466 }
467 for (next = p + 1; (void*)next <= (void*)end; p = next, next++) {
468 sum += *p;
469 }
470 return sum;
471 }
472
473 #pragma mark *** darbench routines
474
475
476 /*ARGSUSED*/
477 int
478 benchmark_initbatch(void *tsd)
479 {
480 /*
481 * initialize your state variables here second
482 */
483 tsd_t *ts = (tsd_t *)tsd;
484 int procs;
485
486 initialize_overhead(tsd);
487
488 ts->pids = (pid_t*)malloc(ts->procs * sizeof(pid_t));
489 if (ts->pids == NULL)
490 exit(1);
491 bzero((void*)ts->pids, ts->procs * sizeof(pid_t));
492 procs = create_daemons(ts->p, ts->pids,
493 ts->procs, ts->process_size);
494 if (procs < ts->procs) {
495 cleanup(tsd);
496 exit(1);
497 }
498 return (0);
499 }
500
501 int
502 benchmark_finirun()
503 {
504 return (0);
505 }
506
507 int
508 benchmark_init()
509 {
510 /*
511 * the lm_optstr must be defined here or no options for you
512 *
513 * ...and the framework will throw an error
514 *
515 */
516 (void) sprintf(lm_optstr, "s:");
517 /*
518 * working hypothesis:
519 *
520 * tsd_t is the struct that we can pass around our
521 * state info in
522 *
523 * lm_tsdsize will allocate the space we need for this
524 * structure throughout the rest of the framework
525 */
526 lm_tsdsize = sizeof (tsd_t);
527
528 (void) sprintf(lm_usage,
529 " [-s kbytes]\n"
530 " processes [processes ...]\n");
531
532 return (0);
533 }
534
535 int
536 benchmark_fini()
537 {
538 return (0);
539 }
540
541 int
542 benchmark_finibatch(void *tsd)
543 {
544 tsd_t *ts = (tsd_t *)tsd;
545 int i;
546
547 /*
548 * Close the pipes and kill the children.
549 */
550 cleanup_overhead(tsd);
551 for (i = 1; ts->pids && i < ts->procs; ++i) {
552 if (ts->pids[i] > 0) {
553 kill(ts->pids[i], SIGKILL);
554 waitpid(ts->pids[i], NULL, 0);
555 }
556 }
557 if (ts->pids)
558 free(ts->pids);
559 ts->pids = NULL;
560 return (0);
561 }
562
563 char *
564 benchmark_result()
565 {
566 static char result = '\0';
567 return (&result);
568 }
569
570 int
571 benchmark_finiworker(void *tsd)
572 {
573 return (0);
574 }
575
576 int
577 benchmark_optswitch(int opt, char *optarg)
578 {
579
580 switch (opt) {
581 case 's':
582 opts = sizetoint(optarg);
583 break;
584 default:
585 return (-1);
586 }
587 return (0);
588 }
589
590 int
591 benchmark_initworker(void *tsd)
592 {
593 tsd_t *ts = (tsd_t *)tsd;
594
595 ts->process_size = opts;
596
597 return (0);
598 }
599
600 int
601 benchmark_initrun()
602 {
603 return (0);
604 }
605
606 int
607 benchmark(void *tsd, result_t *res)
608 {
609 /*
610 * initialize your state variables here last
611 *
612 * and realize that you are paying for your initialization here
613 * and it is really a bad idea
614 */
615 tsd_t *ts = (tsd_t *)tsd;
616 int i;
617 int msg=1;
618
619 for (i = 0; i < lm_optB; i++) {
620 if (write(ts->p[0][1], &msg, sizeof(msg)) !=
621 sizeof(msg)) {
622 debug("read/write on pipe");
623 exit(1);
624 }
625 if (read(ts->p[ts->procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) {
626 debug("read/write on pipe");
627 exit(1);
628 }
629 bread(ts->data, ts->process_size);
630 }
631 res->re_count = i;
632
633 return (0);
634 }