123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423 |
- #include <math.h>
- #include "bench.h"
- #define BOOTSTRAP_COUNT 200
- /*
- * a comparison function used by qsort
- */
- int
- int_compare(const void *a, const void *b)
- {
- if (*(int*)a < *(int*)b) return -1;
- if (*(int*)a > *(int*)b) return 1;
- return 0;
- }
- /*
- * a comparison function used by qsort
- */
- int
- uint64_compare(const void *a, const void *b)
- {
- if (*(uint64*)a < *(uint64*)b) return -1;
- if (*(uint64*)a > *(uint64*)b) return 1;
- return 0;
- }
- /*
- * a comparison function used by qsort
- */
- int
- double_compare(const void *a, const void *b)
- {
- if (*(double*)a < *(double*)b) return -1;
- if (*(double*)a > *(double*)b) return 1;
- return 0;
- }
- /*
- * return the median value of an array of int
- */
- int
- int_median(int *values, int size)
- {
- qsort(values, size, sizeof(int), int_compare);
- if (size == 0) return 0;
- if (size % 2) {
- return values[size/2];
- }
- return (values[size/2 - 1] + values[size/2]) / 2;
- }
- /*
- * return the median value of an array of int
- */
- uint64
- uint64_median(uint64 *values, int size)
- {
- qsort(values, size, sizeof(uint64), uint64_compare);
- if (size == 0) return 0;
- if (size % 2) {
- return values[size/2];
- }
- return (values[size/2 - 1] + values[size/2]) / 2;
- }
- /*
- * return the median value of an array of doubles
- */
- double
- double_median(double *values, int size)
- {
- qsort(values, size, sizeof(double), double_compare);
- if (size == 0) return 0.;
- if (size % 2) {
- return values[size/2];
- }
- return (values[size/2 - 1] + values[size/2]) / 2.0;
- }
- /*
- * return the mean value of an array of int
- */
- int
- int_mean(int *values, int size)
- {
- int i;
- int sum = 0;
- for (i = 0; i < size; ++i)
- sum += values[i];
- return sum / size;
- }
- /*
- * return the mean value of an array of int
- */
- uint64
- uint64_mean(uint64 *values, int size)
- {
- int i;
- uint64 sum = 0;
- for (i = 0; i < size; ++i)
- sum += values[i];
- return sum / size;
- }
- /*
- * return the mean value of an array of doubles
- */
- double
- double_mean(double *values, int size)
- {
- int i;
- double sum = 0.0;
- for (i = 0; i < size; ++i)
- sum += values[i];
- return sum / (double)size;
- }
- /*
- * return the min value of an array of int
- */
- int
- int_min(int *values, int size)
- {
- int i;
- int min = values[0];
- for (i = 1; i < size; ++i)
- if (values[i] < min) min = values[i];
- return min;
- }
- /*
- * return the min value of an array of int
- */
- uint64
- uint64_min(uint64 *values, int size)
- {
- int i;
- uint64 min = values[0];
- for (i = 1; i < size; ++i)
- if (values[i] < min) min = values[i];
- return min;
- }
- /*
- * return the min value of an array of doubles
- */
- double
- double_min(double *values, int size)
- {
- int i;
- double min = values[0];
- for (i = 1; i < size; ++i)
- if (values[i] < min) min = values[i];
- return min;
- }
- /*
- * return the max value of an array of int
- */
- int
- int_max(int *values, int size)
- {
- int i;
- int max = values[0];
- for (i = 1; i < size; ++i)
- if (values[i] > max) max = values[i];
- return max;
- }
- /*
- * return the max value of an array of int
- */
- uint64
- uint64_max(uint64 *values, int size)
- {
- int i;
- uint64 max = values[0];
- for (i = 1; i < size; ++i)
- if (values[i] > max) max = values[i];
- return max;
- }
- /*
- * return the max value of an array of doubles
- */
- double
- double_max(double *values, int size)
- {
- int i;
- double max = values[0];
- for (i = 1; i < size; ++i)
- if (values[i] > max) max = values[i];
- return max;
- }
- /*
- * return the standard error of an array of ints
- */
- double int_stderr(int *values, int size)
- {
- int i;
- double sum = 0.0;
- int mean = int_mean(values, size);
- for (i = 0; i < size; ++i)
- sum += (double)((values[i] - mean) * (values[i] - mean));
- sum /= (double)(size * size);
- return sqrt(sum);
- }
- /*
- * return the standard error of an array of uint64s
- */
- double uint64_stderr(uint64 *values, int size)
- {
- int i;
- double sum = 0.0;
- uint64 mean = uint64_mean(values, size);
- for (i = 0; i < size; ++i)
- sum += (double)((values[i] - mean) * (values[i] - mean));
- sum /= (double)(size * size);
- return sqrt(sum);
- }
- /*
- * return the standard error of an array of doubles
- */
- double double_stderr(double *values, int size)
- {
- int i;
- double sum = 0.0;
- double mean = double_mean(values, size);
- for (i = 0; i < size; ++i)
- sum += (double)((values[i] - mean) * (values[i] - mean));
- sum /= (double)(size * size);
- return sqrt(sum);
- }
- /*
- * BOOTSTRAP:
- *
- * stderr = sqrt(sum_i(s[i] - sum_j(s[j])/B)**2 / (B - 1))
- *
- * Reference: "An Introduction to the Bootstrap" by Bradley
- * Efron and Robert J. Tibshirani, page 12.
- */
- /*
- * return the bootstrap estimation of the standard error
- * of an array of ints
- */
- double int_bootstrap_stderr(int *values, int size, int_stat f)
- {
- int i, j;
- int *samples = (int*)malloc(size * sizeof(int));
- double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
- double s_sum = 0;
- double sum = 0;
- /* generate the stderr for each of the bootstrap samples */
- for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
- for (j = 0; j < size; ++j)
- samples[j] = values[rand() % size];
- s[i] = (double)(*f)(samples, size);
- s_sum += s[i]; /* CHS: worry about overflow */
- }
- s_sum /= (double)BOOTSTRAP_COUNT;
-
- for (i = 0; i < BOOTSTRAP_COUNT; ++i)
- sum += (s[i] - s_sum) * (s[i] - s_sum);
- sum /= (double)(BOOTSTRAP_COUNT - 1);
- free(samples);
- free(s);
- return sqrt(sum);
- }
- /*
- * return the bootstrap estimation of the standard error
- * of an array of uint64s
- */
- double uint64_bootstrap_stderr(uint64 *values, int size, uint64_stat f)
- {
- int i, j;
- uint64 *samples = (uint64*)malloc(size * sizeof(uint64));
- double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
- double s_sum;
- double sum;
- /* generate the stderr for each of the bootstrap samples */
- for (i = 0, s_sum = 0.0; i < BOOTSTRAP_COUNT; ++i) {
- for (j = 0; j < size; ++j)
- samples[j] = values[rand() % size];
- s[i] = (double)(*f)(samples, size);
- s_sum += s[i]; /* CHS: worry about overflow */
- }
- s_sum /= (double)BOOTSTRAP_COUNT;
-
- for (i = 0, sum = 0.0; i < BOOTSTRAP_COUNT; ++i)
- sum += (s[i] - s_sum) * (s[i] - s_sum);
- free(samples);
- free(s);
- return sqrt(sum/(double)(BOOTSTRAP_COUNT - 1));
- }
- /*
- * return the bootstrap estimation of the standard error
- * of an array of doubles
- */
- double double_bootstrap_stderr(double *values, int size, double_stat f)
- {
- int i, j;
- double *samples = (double*)malloc(size * sizeof(double));
- double *s = (double*)malloc(BOOTSTRAP_COUNT * sizeof(double));
- double s_sum = 0;
- double sum = 0;
- /* generate the stderr for each of the bootstrap samples */
- for (i = 0; i < BOOTSTRAP_COUNT; ++i) {
- for (j = 0; j < size; ++j)
- samples[j] = values[rand() % size];
- s[i] = (*f)(samples, size);
- s_sum += (double)s[i]; /* CHS: worry about overflow */
- }
- s_sum /= (double)BOOTSTRAP_COUNT;
-
- for (i = 0; i < BOOTSTRAP_COUNT; ++i)
- sum += (s[i] - s_sum) * (s[i] - s_sum);
- sum /= (double)(BOOTSTRAP_COUNT - 1);
- free(samples);
- free(s);
- return sqrt(sum);
- }
- /*
- * regression(x, y, sig, n, a, b, sig_a, sig_b, chi2)
- *
- * This routine is derived from equations in "Numerical Recipes in C"
- * (second edition) by Press, et. al., pages 661-665.
- *
- * compute the linear regression y = a + bx for (x,y), where y[i] has
- * standard deviation sig[i].
- *
- * returns the coefficients a and b, along with an estimation of their
- * error (standard deviation) in sig_a and sig_b.
- *
- * returns chi2 for "goodness of fit" information.
- */
- void
- regression(double *x, double *y, double *sig, int n,
- double *a, double *b, double *sig_a, double *sig_b,
- double *chi2)
- {
- int i;
- double S = 0.0, Sx = 0.0, Sy = 0.0, Stt = 0.0, Sx_S;
- /* compute some basic statistics */
- for (i = 0; i < n; ++i) {
- /* Equations 15.2.4: for S, Sx, Sy */
- double weight = 1.0 / (sig ? sig[i] * sig[i] : 1.0);
- S += weight;
- Sx += weight * x[i];
- Sy += weight * y[i];
- }
- *b = 0.0;
- Sx_S = Sx / S;
- for (i = 0; i < n; ++i) {
- /*
- * Equation 15.2.15 for t
- * Equation 15.2.16 for Stt
- * Equation 15.2.17 for b, do summation portion of equation
- * compute Sum i=0,n-1 (t_i * y[i] / sig[i]))
- */
- double t_i = (x[i] - Sx_S) / (sig ? sig[i] : 1.0);
- Stt += t_i * t_i;
- *b += t_i * y[i] / (sig ? sig[i] : 1.0);
- }
- /*
- * Equation 15.2.17 for b, do 1/Stt * summation
- * Equation 15.2.18 for a
- * Equation 15.2.19 for sig_a
- * Equation 15.2.20 for sig_b
- */
- *b /= Stt;
- *a = (Sy - *b * Sx) / S;
- *sig_a = sqrt((1.0 + (Sx * Sx) / (S * Stt)) / S);
- *sig_b = sqrt(1.0 / Stt);
- /* Equation 15.2.2 for chi2, the merit function */
- *chi2 = 0.0;
- for (i = 0; i < n; ++i) {
- double merit = (y[i] - ((*a) + (*b) * x[i])) / (sig ? sig[i] : 1.0);
- *chi2 += merit * merit;
- }
- if (sig == NULL) {
- *sig_a *= sqrt((*chi2) / (n - 2));
- *sig_b *= sqrt((*chi2) / (n - 2));
- }
- }
|