123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154 |
- /*
- * lat_mem_rd.c - measure memory load latency
- *
- * usage: lat_mem_rd size-in-MB stride [stride ...]
- *
- * Copyright (c) 1994 Larry McVoy. Distributed under the FSF GPL with
- * additional restriction that results may published only if
- * (1) the benchmark is unmodified, and
- * (2) the version in the sccsid below is included in the report.
- * Support for this development by Sun Microsystems is gratefully acknowledged.
- */
- char *id = "$Id$\n";
- #include "bench.h"
- #define N 1000000 /* Don't change this */
- #define STRIDE (512/sizeof(char *))
- #define MEMTRIES 4
- #define LOWER 512
- void loads(char *addr, size_t range, size_t stride);
- size_t step(size_t k);
- int
- main(int ac, char **av)
- {
- size_t len;
- size_t range;
- size_t stride;
- int i;
- char *addr;
- len = atoi(av[1]) * 1024 * 1024;
- addr = (char *)malloc(len);
- if (av[2] == 0) {
- fprintf(stderr, "\"stride=%d\n", STRIDE);
- for (range = LOWER; range <= len; range = step(range)) {
- loads(addr, range, STRIDE);
- }
- } else {
- for (i = 2; i < ac; ++i) {
- stride = bytes(av[i]);
- fprintf(stderr, "\"stride=%d\n", stride);
- for (range = LOWER; range <= len; range = step(range)) {
- loads(addr, range, stride);
- }
- fprintf(stderr, "\n");
- }
- }
- return(0);
- }
- void
- loads(char *addr, size_t range, size_t stride)
- {
- register char **p = 0 /* lint */;
- size_t i;
- int tries = 0;
- int result = 0x7fffffff;
- double time;
- if (stride & (sizeof(char *) - 1)) {
- printf("lat_mem_rd: stride must be aligned.\n");
- return;
- }
-
- if (range < stride) {
- return;
- }
- /*
- * First create a list of pointers.
- *
- * This used to go forwards, we want to go backwards to try and defeat
- * HP's fetch ahead.
- *
- * We really need to do a random pattern once we are doing one hit per
- * page.
- */
- for (i = stride; i < range; i += stride) {
- *(char **)&addr[i] = (char*)&addr[i - stride];
- }
- *(char **)&addr[0] = (char*)&addr[i - stride];
- p = (char**)&addr[0];
- /*
- * Now walk them and time it.
- */
- for (tries = 0; tries < MEMTRIES; ++tries) {
- /* time loop with loads */
- #define ONE p = (char **)*p;
- #define FIVE ONE ONE ONE ONE ONE
- #define TEN FIVE FIVE
- #define FIFTY TEN TEN TEN TEN TEN
- #define HUNDRED FIFTY FIFTY
- i = N;
- start(0);
- while (i >= 1000) {
- HUNDRED
- HUNDRED
- HUNDRED
- HUNDRED
- HUNDRED
- HUNDRED
- HUNDRED
- HUNDRED
- HUNDRED
- HUNDRED
- i -= 1000;
- }
- i = stop(0,0);
- use_pointer((void *)p);
- if (i < result) {
- result = i;
- }
- }
- /*
- * We want to get to nanoseconds / load. We don't want to
- * lose any precision in the process. What we have is the
- * milliseconds it took to do N loads, where N is 1 million,
- * and we expect that each load took between 10 and 2000
- * nanoseconds.
- *
- * We want just the memory latency time, not including the
- * time to execute the load instruction. We allow one clock
- * for the instruction itself. So we need to subtract off
- * N * clk nanoseconds.
- *
- * lmbench 2.0 - do the subtration later, in the summary.
- * Doing it here was problematic.
- *
- * XXX - we do not account for loop overhead here.
- */
- time = (double)result;
- time *= 1000.; /* convert to nanoseconds */
- time /= (double)N; /* nanosecs per load */
- fprintf(stderr, "%.5f %.3f\n", range / (1024. * 1024), time);
- }
- size_t
- step(size_t k)
- {
- if (k < 1024) {
- k = k * 2;
- } else if (k < 4*1024) {
- k += 1024;
- } else {
- size_t s;
- for (s = 32 * 1024; s <= k; s *= 2)
- ;
- k += s / 16;
- }
- return (k);
- }
|