lib_stats.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. #include <math.h>
  2. #include "bench.h"
  3. #define BOOTSTRAP_COUNT 200
  4. /*
  5. * a comparison function used by qsort
  6. */
  7. int
  8. int_compare(const void *a, const void *b)
  9. {
  10. if (*(int*)a < *(int*)b) return -1;
  11. if (*(int*)a > *(int*)b) return 1;
  12. return 0;
  13. }
  14. /*
  15. * a comparison function used by qsort
  16. */
  17. int
  18. uint64_compare(const void *a, const void *b)
  19. {
  20. if (*(uint64*)a < *(uint64*)b) return -1;
  21. if (*(uint64*)a > *(uint64*)b) return 1;
  22. return 0;
  23. }
  24. /*
  25. * a comparison function used by qsort
  26. */
  27. int
  28. double_compare(const void *a, const void *b)
  29. {
  30. if (*(double*)a < *(double*)b) return -1;
  31. if (*(double*)a > *(double*)b) return 1;
  32. return 0;
  33. }
  34. /*
  35. * return the median value of an array of int
  36. */
  37. int
  38. int_median(int *values, int size)
  39. {
  40. qsort(values, size, sizeof(int), int_compare);
  41. if (size == 0) return 0;
  42. if (size % 2) {
  43. return values[size/2];
  44. }
  45. return (values[size/2 - 1] + values[size/2]) / 2;
  46. }
  47. /*
  48. * return the median value of an array of int
  49. */
  50. uint64
  51. uint64_median(uint64 *values, int size)
  52. {
  53. qsort(values, size, sizeof(uint64), uint64_compare);
  54. if (size == 0) return 0;
  55. if (size % 2) {
  56. return values[size/2];
  57. }
  58. return (values[size/2 - 1] + values[size/2]) / 2;
  59. }
  60. /*
  61. * return the median value of an array of doubles
  62. */
  63. double
  64. double_median(double *values, int size)
  65. {
  66. qsort(values, size, sizeof(double), double_compare);
  67. if (size == 0) return 0.;
  68. if (size % 2) {
  69. return values[size/2];
  70. }
  71. return (values[size/2 - 1] + values[size/2]) / 2.0;
  72. }
  73. /*
  74. * return the mean value of an array of int
  75. */
  76. int
  77. int_mean(int *values, int size)
  78. {
  79. int i;
  80. int sum = 0;
  81. for (i = 0; i < size; ++i)
  82. sum += values[i];
  83. return sum / size;
  84. }
  85. /*
  86. * return the mean value of an array of int
  87. */
  88. uint64
  89. uint64_mean(uint64 *values, int size)
  90. {
  91. int i;
  92. uint64 sum = 0;
  93. for (i = 0; i < size; ++i)
  94. sum += values[i];
  95. return sum / size;
  96. }
  97. /*
  98. * return the mean value of an array of doubles
  99. */
  100. double
  101. double_mean(double *values, int size)
  102. {
  103. int i;
  104. double sum = 0.0;
  105. for (i = 0; i < size; ++i)
  106. sum += values[i];
  107. return sum / (double)size;
  108. }
  109. /*
  110. * return the min value of an array of int
  111. */
  112. int
  113. int_min(int *values, int size)
  114. {
  115. int i;
  116. int min = values[0];
  117. for (i = 1; i < size; ++i)
  118. if (values[i] < min) min = values[i];
  119. return min;
  120. }
  121. /*
  122. * return the min value of an array of int
  123. */
  124. uint64
  125. uint64_min(uint64 *values, int size)
  126. {
  127. int i;
  128. uint64 min = values[0];
  129. for (i = 1; i < size; ++i)
  130. if (values[i] < min) min = values[i];
  131. return min;
  132. }
  133. /*
  134. * return the min value of an array of doubles
  135. */
  136. double
  137. double_min(double *values, int size)
  138. {
  139. int i;
  140. double min = values[0];
  141. for (i = 1; i < size; ++i)
  142. if (values[i] < min) min = values[i];
  143. return min;
  144. }
  145. /*
  146. * return the max value of an array of int
  147. */
  148. int
  149. int_max(int *values, int size)
  150. {
  151. int i;
  152. int max = values[0];
  153. for (i = 1; i < size; ++i)
  154. if (values[i] > max) max = values[i];
  155. return max;
  156. }
  157. /*
  158. * return the max value of an array of int
  159. */
  160. uint64
  161. uint64_max(uint64 *values, int size)
  162. {
  163. int i;
  164. uint64 max = values[0];
  165. for (i = 1; i < size; ++i)
  166. if (values[i] > max) max = values[i];
  167. return max;
  168. }
  169. /*
  170. * return the max value of an array of doubles
  171. */
  172. double
  173. double_max(double *values, int size)
  174. {
  175. int i;
  176. double max = values[0];
  177. for (i = 1; i < size; ++i)
  178. if (values[i] > max) max = values[i];
  179. return max;
  180. }
  181. /*
  182. * return the standard error of an array of ints
  183. */
  184. double int_stderr(int *values, int size)
  185. {
  186. int i;
  187. double sum = 0.0;
  188. int mean = int_mean(values, size);
  189. for (i = 0; i < size; ++i)
  190. sum += (double)((values[i] - mean) * (values[i] - mean));
  191. sum /= (double)(size * size);
  192. return sqrt(sum);
  193. }
  194. /*
  195. * return the standard error of an array of uint64s
  196. */
  197. double uint64_stderr(uint64 *values, int size)
  198. {
  199. int i;
  200. double sum = 0.0;
  201. uint64 mean = uint64_mean(values, size);
  202. for (i = 0; i < size; ++i)
  203. sum += (double)((values[i] - mean) * (values[i] - mean));
  204. sum /= (double)(size * size);
  205. return sqrt(sum);
  206. }
  207. /*
  208. * return the standard error of an array of doubles
  209. */
  210. double double_stderr(double *values, int size)
  211. {
  212. int i;
  213. double sum = 0.0;
  214. double mean = double_mean(values, size);
  215. for (i = 0; i < size; ++i)
  216. sum += (double)((values[i] - mean) * (values[i] - mean));
  217. sum /= (double)(size * size);
  218. return sqrt(sum);
  219. }
  220. /*
  221. * BOOTSTRAP:
  222. *
  223. * stderr = sqrt(sum_i(s[i] - sum_j(s[j])/B)**2 / (B - 1))
  224. *
  225. * Reference: "An Introduction to the Bootstrap" by Bradley
  226. * Efron and Robert J. Tibshirani, page 12.
  227. */
  228. /*
  229. * return the bootstrap estimation of the standard error
  230. * of an array of ints
  231. */
  232. double int_bootstrap_stderr(int *values, int size, int_stat f)
  233. {
  234. int i, j;
  235. int *samples = (int*)malloc(size * sizeof(int));
  236. double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
  237. double s_sum = 0;
  238. double sum = 0;
  239. /* generate the stderr for each of the bootstrap samples */
  240. for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
  241. for (j = 0; j < size; ++j)
  242. samples[j] = values[rand() % size];
  243. s[i] = (double)(*f)(samples, size);
  244. s_sum += s[i]; /* CHS: worry about overflow */
  245. }
  246. s_sum /= (double)BOOTSTRAP_COUNT;
  247. for (i = 0; i < BOOTSTRAP_COUNT; ++i)
  248. sum += (s[i] - s_sum) * (s[i] - s_sum);
  249. sum /= (double)(BOOTSTRAP_COUNT - 1);
  250. free(samples);
  251. free(s);
  252. return sqrt(sum);
  253. }
  254. /*
  255. * return the bootstrap estimation of the standard error
  256. * of an array of uint64s
  257. */
  258. double uint64_bootstrap_stderr(uint64 *values, int size, uint64_stat f)
  259. {
  260. int i, j;
  261. uint64 *samples = (uint64*)malloc(size * sizeof(uint64));
  262. double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
  263. double s_sum;
  264. double sum;
  265. /* generate the stderr for each of the bootstrap samples */
  266. for (i = 0, s_sum = 0.0; i < BOOTSTRAP_COUNT; ++i) {
  267. for (j = 0; j < size; ++j)
  268. samples[j] = values[rand() % size];
  269. s[i] = (double)(*f)(samples, size);
  270. s_sum += s[i]; /* CHS: worry about overflow */
  271. }
  272. s_sum /= (double)BOOTSTRAP_COUNT;
  273. for (i = 0, sum = 0.0; i < BOOTSTRAP_COUNT; ++i)
  274. sum += (s[i] - s_sum) * (s[i] - s_sum);
  275. free(samples);
  276. free(s);
  277. return sqrt(sum/(double)(BOOTSTRAP_COUNT - 1));
  278. }
  279. /*
  280. * return the bootstrap estimation of the standard error
  281. * of an array of doubles
  282. */
  283. double double_bootstrap_stderr(double *values, int size, double_stat f)
  284. {
  285. int i, j;
  286. double *samples = (double*)malloc(size * sizeof(double));
  287. double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
  288. double s_sum = 0;
  289. double sum = 0;
  290. /* generate the stderr for each of the bootstrap samples */
  291. for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
  292. for (j = 0; j < size; ++j)
  293. samples[j] = values[rand() % size];
  294. s[i] = (*f)(samples, size);
  295. s_sum += (double)s[i]; /* CHS: worry about overflow */
  296. }
  297. s_sum /= (double)BOOTSTRAP_COUNT;
  298. for (i = 0; i < BOOTSTRAP_COUNT; ++i)
  299. sum += (s[i] - s_sum) * (s[i] - s_sum);
  300. sum /= (double)(BOOTSTRAP_COUNT - 1);
  301. free(samples);
  302. free(s);
  303. return sqrt(sum);
  304. }
  305. /*
  306. * regression(x, y, sig, n, a, b, sig_a, sig_b, chi2)
  307. *
  308. * This routine is derived from equations in "Numerical Recipes in C"
  309. * (second edition) by Press, et. al., pages 661-665.
  310. *
  311. * compute the linear regression y = a + bx for (x,y), where y[i] has
  312. * standard deviation sig[i].
  313. *
  314. * returns the coefficients a and b, along with an estimation of their
  315. * error (standard deviation) in sig_a and sig_b.
  316. *
  317. * returns chi2 for "goodness of fit" information.
  318. */
  319. void
  320. regression(double *x, double *y, double *sig, int n,
  321. double *a, double *b, double *sig_a, double *sig_b,
  322. double *chi2)
  323. {
  324. int i;
  325. double S = 0.0, Sx = 0.0, Sy = 0.0, Stt = 0.0, Sx_S;
  326. /* compute some basic statistics */
  327. for (i = 0; i < n; ++i) {
  328. /* Equations 15.2.4: for S, Sx, Sy */
  329. double weight = 1.0 / (sig ? sig[i] * sig[i] : 1.0);
  330. S += weight;
  331. Sx += weight * x[i];
  332. Sy += weight * y[i];
  333. }
  334. *b = 0.0;
  335. Sx_S = Sx / S;
  336. for (i = 0; i < n; ++i) {
  337. /*
  338. * Equation 15.2.15 for t
  339. * Equation 15.2.16 for Stt
  340. * Equation 15.2.17 for b, do summation portion of equation
  341. * compute Sum i=0,n-1 (t_i * y[i] / sig[i]))
  342. */
  343. double t_i = (x[i] - Sx_S) / (sig ? sig[i] : 1.0);
  344. Stt += t_i * t_i;
  345. *b += t_i * y[i] / (sig ? sig[i] : 1.0);
  346. }
  347. /*
  348. * Equation 15.2.17 for b, do 1/Stt * summation
  349. * Equation 15.2.18 for a
  350. * Equation 15.2.19 for sig_a
  351. * Equation 15.2.20 for sig_b
  352. */
  353. *b /= Stt;
  354. *a = (Sy - *b * Sx) / S;
  355. *sig_a = sqrt((1.0 + (Sx * Sx) / (S * Stt)) / S);
  356. *sig_b = sqrt(1.0 / Stt);
  357. /* Equation 15.2.2 for chi2, the merit function */
  358. *chi2 = 0.0;
  359. for (i = 0; i < n; ++i) {
  360. double merit = (y[i] - ((*a) + (*b) * x[i])) / (sig ? sig[i] : 1.0);
  361. *chi2 += merit * merit;
  362. }
  363. if (sig == NULL) {
  364. *sig_a *= sqrt((*chi2) / (n - 2));
  365. *sig_b *= sqrt((*chi2) / (n - 2));
  366. }
  367. }