lat_mem_rd.c 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /*
  2. * lat_mem_rd.c - measure memory load latency
  3. *
  4. * usage: lat_mem_rd size-in-MB stride [stride ...]
  5. *
  6. * Copyright (c) 1994 Larry McVoy. Distributed under the FSF GPL with
  7. * additional restriction that results may published only if
  8. * (1) the benchmark is unmodified, and
  9. * (2) the version in the sccsid below is included in the report.
  10. * Support for this development by Sun Microsystems is gratefully acknowledged.
  11. */
  12. char *id = "$Id$\n";
  13. #include "bench.h"
  14. #define N 1000000 /* Don't change this */
  15. #define STRIDE (512/sizeof(char *))
  16. #define MEMTRIES 4
  17. #define LOWER 512
  18. void loads(char *addr, size_t range, size_t stride);
  19. size_t step(size_t k);
  20. int
  21. main(int ac, char **av)
  22. {
  23. size_t len;
  24. size_t range;
  25. size_t stride;
  26. int i;
  27. char *addr;
  28. len = atoi(av[1]) * 1024 * 1024;
  29. addr = (char *)malloc(len);
  30. if (av[2] == 0) {
  31. fprintf(stderr, "\"stride=%d\n", STRIDE);
  32. for (range = LOWER; range <= len; range = step(range)) {
  33. loads(addr, range, STRIDE);
  34. }
  35. } else {
  36. for (i = 2; i < ac; ++i) {
  37. stride = bytes(av[i]);
  38. fprintf(stderr, "\"stride=%d\n", stride);
  39. for (range = LOWER; range <= len; range = step(range)) {
  40. loads(addr, range, stride);
  41. }
  42. fprintf(stderr, "\n");
  43. }
  44. }
  45. return(0);
  46. }
  47. void
  48. loads(char *addr, size_t range, size_t stride)
  49. {
  50. register char **p = 0 /* lint */;
  51. size_t i;
  52. int tries = 0;
  53. int result = 0x7fffffff;
  54. double time;
  55. if (stride & (sizeof(char *) - 1)) {
  56. printf("lat_mem_rd: stride must be aligned.\n");
  57. return;
  58. }
  59. if (range < stride) {
  60. return;
  61. }
  62. /*
  63. * First create a list of pointers.
  64. *
  65. * This used to go forwards, we want to go backwards to try and defeat
  66. * HP's fetch ahead.
  67. *
  68. * We really need to do a random pattern once we are doing one hit per
  69. * page.
  70. */
  71. for (i = stride; i < range; i += stride) {
  72. *(char **)&addr[i] = (char*)&addr[i - stride];
  73. }
  74. *(char **)&addr[0] = (char*)&addr[i - stride];
  75. p = (char**)&addr[0];
  76. /*
  77. * Now walk them and time it.
  78. */
  79. for (tries = 0; tries < MEMTRIES; ++tries) {
  80. /* time loop with loads */
  81. #define ONE p = (char **)*p;
  82. #define FIVE ONE ONE ONE ONE ONE
  83. #define TEN FIVE FIVE
  84. #define FIFTY TEN TEN TEN TEN TEN
  85. #define HUNDRED FIFTY FIFTY
  86. i = N;
  87. start(0);
  88. while (i >= 1000) {
  89. HUNDRED
  90. HUNDRED
  91. HUNDRED
  92. HUNDRED
  93. HUNDRED
  94. HUNDRED
  95. HUNDRED
  96. HUNDRED
  97. HUNDRED
  98. HUNDRED
  99. i -= 1000;
  100. }
  101. i = stop(0,0);
  102. use_pointer((void *)p);
  103. if (i < result) {
  104. result = i;
  105. }
  106. }
  107. /*
  108. * We want to get to nanoseconds / load. We don't want to
  109. * lose any precision in the process. What we have is the
  110. * milliseconds it took to do N loads, where N is 1 million,
  111. * and we expect that each load took between 10 and 2000
  112. * nanoseconds.
  113. *
  114. * We want just the memory latency time, not including the
  115. * time to execute the load instruction. We allow one clock
  116. * for the instruction itself. So we need to subtract off
  117. * N * clk nanoseconds.
  118. *
  119. * lmbench 2.0 - do the subtration later, in the summary.
  120. * Doing it here was problematic.
  121. *
  122. * XXX - we do not account for loop overhead here.
  123. */
  124. time = (double)result;
  125. time *= 1000.; /* convert to nanoseconds */
  126. time /= (double)N; /* nanosecs per load */
  127. fprintf(stderr, "%.5f %.3f\n", range / (1024. * 1024), time);
  128. }
  129. size_t
  130. step(size_t k)
  131. {
  132. if (k < 1024) {
  133. k = k * 2;
  134. } else if (k < 4*1024) {
  135. k += 1024;
  136. } else {
  137. size_t s;
  138. for (s = 32 * 1024; s <= k; s *= 2)
  139. ;
  140. k += s / 16;
  141. }
  142. return (k);
  143. }