prob_distr.c 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688
  1. /* Copyright (c) 2018-2019, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. /**
  4. * \file prob_distr.c
  5. *
  6. * \brief
  7. * Implements various probability distributions.
  8. * Almost all code is courtesy of Riastradh.
  9. *
  10. * \details
  11. * Here are some details that might help you understand this file:
  12. *
  13. * - Throughout this file, `eps' means the largest relative error of a
  14. * correctly rounded floating-point operation, which in binary64
  15. * floating-point arithmetic is 2^-53. Here the relative error of a
  16. * true value x from a computed value y is |x - y|/|x|. This
  17. * definition of epsilon is conventional for numerical analysts when
  18. * writing error analyses. (If your libm doesn't provide correctly
  19. * rounded exp and log, their relative error is usually below 2*2^-53
  20. * and probably closer to 1.1*2^-53 instead.)
  21. *
  22. * The C constant DBL_EPSILON is actually twice this, and should
  23. * perhaps rather be named ulp(1) -- that is, it is the distance from
  24. * 1 to the next greater floating-point number, which is usually of
  25. * more interest to programmers and hardware engineers.
  26. *
  27. * Since this file is concerned mainly with error bounds rather than
  28. * with low-level bit-hacking of floating-point numbers, we adopt the
  29. * numerical analysts' definition in the comments, though we do use
  30. * DBL_EPSILON in a handful of places where it is convenient to use
  31. * some function of eps = DBL_EPSILON/2 in a case analysis.
  32. *
  33. * - In various functions (e.g. sample_log_logistic()) we jump through hoops so
  34. * that we can use reals closer to 0 than closer to 1, since we achieve much
  35. * greater accuracy for floating point numbers near 0. In particular, we can
  36. * represent differences as small as 10^-300 for numbers near 0, but of no
  37. * less than 10^-16 for numbers near 1.
  38. **/
  39. #define PROB_DISTR_PRIVATE
  40. #include "orconfig.h"
  41. #include "lib/math/prob_distr.h"
  42. #include "lib/crypt_ops/crypto_rand.h"
  43. #include "lib/cc/ctassert.h"
  44. #include "lib/log/util_bug.h"
  45. #include <float.h>
  46. #include <math.h>
  47. #include <stddef.h>
  48. /** Declare a function that downcasts from a generic dist struct to the actual
  49. * subtype probablity distribution it represents. */
  50. #define DECLARE_PROB_DISTR_DOWNCAST_FN(name) \
  51. static inline \
  52. const struct name * \
  53. dist_to_const_##name(const struct dist *obj) { \
  54. tor_assert(obj->ops == &name##_ops); \
  55. return SUBTYPE_P(obj, struct name, base); \
  56. }
  57. DECLARE_PROB_DISTR_DOWNCAST_FN(uniform)
  58. DECLARE_PROB_DISTR_DOWNCAST_FN(geometric)
  59. DECLARE_PROB_DISTR_DOWNCAST_FN(logistic)
  60. DECLARE_PROB_DISTR_DOWNCAST_FN(log_logistic)
  61. DECLARE_PROB_DISTR_DOWNCAST_FN(genpareto)
  62. DECLARE_PROB_DISTR_DOWNCAST_FN(weibull)
  63. /**
  64. * Count number of one bits in 32-bit word.
  65. */
  66. static unsigned
  67. bitcount32(uint32_t x)
  68. {
  69. /* Count two-bit groups. */
  70. x -= (x >> 1) & UINT32_C(0x55555555);
  71. /* Count four-bit groups. */
  72. x = ((x >> 2) & UINT32_C(0x33333333)) + (x & UINT32_C(0x33333333));
  73. /* Count eight-bit groups. */
  74. x = (x + (x >> 4)) & UINT32_C(0x0f0f0f0f);
  75. /* Sum all eight-bit groups, and extract the sum. */
  76. return (x * UINT32_C(0x01010101)) >> 24;
  77. }
  78. /**
  79. * Count leading zeros in 32-bit word.
  80. */
  81. static unsigned
  82. clz32(uint32_t x)
  83. {
  84. /* Round up to a power of two. */
  85. x |= x >> 1;
  86. x |= x >> 2;
  87. x |= x >> 4;
  88. x |= x >> 8;
  89. x |= x >> 16;
  90. /* Subtract count of one bits from 32. */
  91. return (32 - bitcount32(x));
  92. }
  93. /*
  94. * Some lemmas that will be used throughout this file to prove various error
  95. * bounds:
  96. *
  97. * Lemma 1. If |d| <= 1/2, then 1/(1 + d) <= 2.
  98. *
  99. * Proof. If 0 <= d <= 1/2, then 1 + d >= 1, so that 1/(1 + d) <= 1.
  100. * If -1/2 <= d <= 0, then 1 + d >= 1/2, so that 1/(1 + d) <= 2. QED.
  101. *
  102. * Lemma 2. If b = a*(1 + d)/(1 + d') for |d'| < 1/2 and nonzero a, b,
  103. * then b = a*(1 + e) for |e| <= 2|d' - d|.
  104. *
  105. * Proof. |a - b|/|a|
  106. * = |a - a*(1 + d)/(1 + d')|/|a|
  107. * = |1 - (1 + d)/(1 + d')|
  108. * = |(1 + d' - 1 - d)/(1 + d')|
  109. * = |(d' - d)/(1 + d')|
  110. * <= 2|d' - d|, by Lemma 1,
  111. *
  112. * QED.
  113. *
  114. * Lemma 3. For |d|, |d'| < 1/4,
  115. *
  116. * |log((1 + d)/(1 + d'))| <= 4|d - d'|.
  117. *
  118. * Proof. Write
  119. *
  120. * log((1 + d)/(1 + d'))
  121. * = log(1 + (1 + d)/(1 + d') - 1)
  122. * = log(1 + (1 + d - 1 - d')/(1 + d')
  123. * = log(1 + (d - d')/(1 + d')).
  124. *
  125. * By Lemma 1, |(d - d')/(1 + d')| < 2|d' - d| < 1, so the Taylor
  126. * series of log(1 + x) converges absolutely for (d - d')/(1 + d'),
  127. * and thus we have
  128. *
  129. * |log(1 + (d - d')/(1 + d'))|
  130. * = |\sum_{n=1}^\infty ((d - d')/(1 + d'))^n/n|
  131. * <= \sum_{n=1}^\infty |(d - d')/(1 + d')|^n/n
  132. * <= \sum_{n=1}^\infty |2(d' - d)|^n/n
  133. * <= \sum_{n=1}^\infty |2(d' - d)|^n
  134. * = 1/(1 - |2(d' - d)|)
  135. * <= 4|d' - d|,
  136. *
  137. * QED.
  138. *
  139. * Lemma 4. If 1/e <= 1 + x <= e, then
  140. *
  141. * log(1 + (1 + d) x) = (1 + d') log(1 + x)
  142. *
  143. * for |d'| < 8|d|.
  144. *
  145. * Proof. Write
  146. *
  147. * log(1 + (1 + d) x)
  148. * = log(1 + x + x*d)
  149. * = log((1 + x) (1 + x + x*d)/(1 + x))
  150. * = log(1 + x) + log((1 + x + x*d)/(1 + x))
  151. * = log(1 + x) (1 + log((1 + x + x*d)/(1 + x))/log(1 + x)).
  152. *
  153. * The relative error is bounded by
  154. *
  155. * |log((1 + x + x*d)/(1 + x))/log(1 + x)|
  156. * <= 4|x + x*d - x|/|log(1 + x)|, by Lemma 3,
  157. * = 4|x*d|/|log(1 + x)|
  158. * < 8|d|,
  159. *
  160. * since in this range 0 < 1 - 1/e < x/log(1 + x) <= e - 1 < 2. QED.
  161. */
  162. /**
  163. * Compute the logistic function: f(x) = 1/(1 + e^{-x}) = e^x/(1 + e^x).
  164. * Maps a log-odds-space probability in [-\infty, +\infty] into a direct-space
  165. * probability in [0,1]. Inverse of logit.
  166. *
  167. * Ill-conditioned for large x; the identity logistic(-x) = 1 -
  168. * logistic(x) and the function logistichalf(x) = logistic(x) - 1/2 may
  169. * help to rearrange a computation.
  170. *
  171. * This implementation gives relative error bounded by 7 eps.
  172. */
  173. STATIC double
  174. logistic(double x)
  175. {
  176. if (x <= log(DBL_EPSILON/2)) {
  177. /*
  178. * If x <= log(DBL_EPSILON/2) = log(eps), then e^x <= eps. In this case
  179. * we will approximate the logistic() function with e^x because the
  180. * relative error is less than eps. Here is a calculation of the
  181. * relative error between the logistic() function and e^x and a proof
  182. * that it's less than eps:
  183. *
  184. * |e^x - e^x/(1 + e^x)|/|e^x/(1 + e^x)|
  185. * <= |1 - 1/(1 + e^x)|*|1 + e^x|
  186. * = |e^x/(1 + e^x)|*|1 + e^x|
  187. * = |e^x|
  188. * <= eps.
  189. */
  190. return exp(x); /* return e^x */
  191. } else if (x <= -log(DBL_EPSILON/2)) {
  192. /*
  193. * e^{-x} > 0, so 1 + e^{-x} > 1, and 0 < 1/(1 +
  194. * e^{-x}) < 1; further, since e^{-x} < 1 + e^{-x}, we
  195. * also have 0 < 1/(1 + e^{-x}) < 1. Thus, if exp has
  196. * relative error d0, + has relative error d1, and /
  197. * has relative error d2, then we get
  198. *
  199. * (1 + d2)/[(1 + (1 + d0) e^{-x})(1 + d1)]
  200. * = (1 + d0)/[1 + e^{-x} + d0 e^{-x}
  201. * + d1 + d1 e^{-x} + d0 d1 e^{-x}]
  202. * = (1 + d0)/[(1 + e^{-x})
  203. * * (1 + d0 e^{-x}/(1 + e^{-x})
  204. * + d1/(1 + e^{-x})
  205. * + d0 d1 e^{-x}/(1 + e^{-x}))].
  206. * = (1 + d0)/[(1 + e^{-x})(1 + d')]
  207. * = [1/(1 + e^{-x})] (1 + d0)/(1 + d')
  208. *
  209. * where
  210. *
  211. * d' = d0 e^{-x}/(1 + e^{-x})
  212. * + d1/(1 + e^{-x})
  213. * + d0 d1 e^{-x}/(1 + e^{-x}).
  214. *
  215. * By Lemma 2 this relative error is bounded by
  216. *
  217. * 2|d0 - d'|
  218. * = 2|d0 - d0 e^{-x}/(1 + e^{-x})
  219. * - d1/(1 + e^{-x})
  220. * - d0 d1 e^{-x}/(1 + e^{-x})|
  221. * <= 2|d0| + 2|d0 e^{-x}/(1 + e^{-x})|
  222. * + 2|d1/(1 + e^{-x})|
  223. * + 2|d0 d1 e^{-x}/(1 + e^{-x})|
  224. * <= 2|d0| + 2|d0| + 2|d1| + 2|d0 d1|
  225. * <= 4|d0| + 2|d1| + 2|d0 d1|
  226. * <= 6 eps + 2 eps^2.
  227. */
  228. return 1/(1 + exp(-x));
  229. } else {
  230. /*
  231. * e^{-x} <= eps, so the relative error of 1 from 1/(1
  232. * + e^{-x}) is
  233. *
  234. * |1/(1 + e^{-x}) - 1|/|1/(1 + e^{-x})|
  235. * = |e^{-x}/(1 + e^{-x})|/|1/(1 + e^{-x})|
  236. * = |e^{-x}|
  237. * <= eps.
  238. *
  239. * This computation avoids an intermediate overflow
  240. * exception, although the effect on the result is
  241. * harmless.
  242. *
  243. * XXX Should maybe raise inexact here.
  244. */
  245. return 1;
  246. }
  247. }
  248. /**
  249. * Compute the logit function: log p/(1 - p). Defined on [0,1]. Maps
  250. * a direct-space probability in [0,1] to a log-odds-space probability
  251. * in [-\infty, +\infty]. Inverse of logistic.
  252. *
  253. * Ill-conditioned near 1/2 and 1; the identity logit(1 - p) =
  254. * -logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help
  255. * to rearrange a computation for p in [1/(1 + e), 1 - 1/(1 + e)].
  256. *
  257. * This implementation gives relative error bounded by 10 eps.
  258. */
  259. STATIC double
  260. logit(double p)
  261. {
  262. /* logistic(-1) <= p <= logistic(+1) */
  263. if (1/(1 + exp(1)) <= p && p <= 1/(1 + exp(-1))) {
  264. /*
  265. * For inputs near 1/2, we want to compute log1p(near
  266. * 0) rather than log(near 1), so write this as:
  267. *
  268. * log(p/(1 - p)) = -log((1 - p)/p)
  269. * = -log(1 + (1 - p)/p - 1)
  270. * = -log(1 + (1 - p - p)/p)
  271. * = -log(1 + (1 - 2p)/p).
  272. *
  273. * Since p = 2p/2 <= 1 <= 2*2p = 4p, the floating-point
  274. * evaluation of 1 - 2p is exact; the only error arises
  275. * from division and log1p. First, note that if
  276. * logistic(-1) <= p <= logistic(+1), (1 - 2p)/p lies
  277. * in the bounds of Lemma 4.
  278. *
  279. * If division has relative error d0 and log1p has
  280. * relative error d1, the outcome is
  281. *
  282. * -(1 + d1) log(1 + (1 - 2p) (1 + d0)/p)
  283. * = -(1 + d1) (1 + d') log(1 + (1 - 2p)/p)
  284. * = -(1 + d1 + d' + d1 d') log(1 + (1 - 2p)/p).
  285. *
  286. * where |d'| < 8|d0| by Lemma 4. The relative error
  287. * is then bounded by
  288. *
  289. * |d1 + d' + d1 d'|
  290. * <= |d1| + 8|d0| + 8|d1 d0|
  291. * <= 9 eps + 8 eps^2.
  292. */
  293. return -log1p((1 - 2*p)/p);
  294. } else {
  295. /*
  296. * For inputs near 0, although 1 - p may be rounded to
  297. * 1, it doesn't matter much because the magnitude of
  298. * the result is so much larger. For inputs near 1, we
  299. * can compute 1 - p exactly, although the precision on
  300. * the input is limited so we won't ever get more than
  301. * about 700 for the output.
  302. *
  303. * If - has relative error d0, / has relative error d1,
  304. * and log has relative error d2, then
  305. *
  306. * (1 + d2) log((1 + d0) p/[(1 - p)(1 + d1)])
  307. * = (1 + d2) [log(p/(1 - p)) + log((1 + d0)/(1 + d1))]
  308. * = log(p/(1 - p)) + d2 log(p/(1 - p))
  309. * + (1 + d2) log((1 + d0)/(1 + d1))
  310. * = log(p/(1 - p))*[1 + d2 +
  311. * + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))]
  312. *
  313. * Since 0 <= p < logistic(-1) or logistic(+1) < p <=
  314. * 1, we have |log(p/(1 - p))| > 1. Hence this error
  315. * is bounded by
  316. *
  317. * |d2 + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))|
  318. * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))
  319. * / log(p/(1 - p))|
  320. * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))|
  321. * <= |d2| + 4|(1 + d2) (d0 - d1)|, by Lemma 3,
  322. * <= |d2| + 4|d0 - d1 + d2 d0 - d1 d0|
  323. * <= |d2| + 4|d0| + 4|d1| + 4|d2 d0| + 4|d1 d0|
  324. * <= 9 eps + 8 eps^2.
  325. */
  326. return log(p/(1 - p));
  327. }
  328. }
  329. /**
  330. * Compute the logit function, translated in input by 1/2: logithalf(p)
  331. * = logit(1/2 + p). Defined on [-1/2, 1/2]. Inverse of logistichalf.
  332. *
  333. * Ill-conditioned near +/-1/2. If |p0| > 1/2 - 1/(1 + e), it may be
  334. * better to compute 1/2 + p0 or -1/2 - p0 and to use logit instead.
  335. * This implementation gives relative error bounded by 34 eps.
  336. */
  337. STATIC double
  338. logithalf(double p0)
  339. {
  340. if (fabs(p0) <= 0.5 - 1/(1 + exp(1))) {
  341. /*
  342. * logit(1/2 + p0)
  343. * = log((1/2 + p0)/(1 - (1/2 + p0)))
  344. * = log((1/2 + p0)/(1/2 - p0))
  345. * = log(1 + (1/2 + p0)/(1/2 - p0) - 1)
  346. * = log(1 + (1/2 + p0 - (1/2 - p0))/(1/2 - p0))
  347. * = log(1 + (1/2 + p0 - 1/2 + p0)/(1/2 - p0))
  348. * = log(1 + 2 p0/(1/2 - p0))
  349. *
  350. * If the error of subtraction is d0, the error of
  351. * division is d1, and the error of log1p is d2, then
  352. * what we compute is
  353. *
  354. * (1 + d2) log(1 + (1 + d1) 2 p0/[(1 + d0) (1/2 - p0)])
  355. * = (1 + d2) log(1 + (1 + d') 2 p0/(1/2 - p0))
  356. * = (1 + d2) (1 + d'') log(1 + 2 p0/(1/2 - p0))
  357. * = (1 + d2 + d'' + d2 d'') log(1 + 2 p0/(1/2 - p0)),
  358. *
  359. * where |d'| < 2|d0 - d1| <= 4 eps by Lemma 2, and
  360. * |d''| < 8|d'| < 32 eps by Lemma 4 since
  361. *
  362. * 1/e <= 1 + 2*p0/(1/2 - p0) <= e
  363. *
  364. * when |p0| <= 1/2 - 1/(1 + e). Hence the relative
  365. * error is bounded by
  366. *
  367. * |d2 + d'' + d2 d''|
  368. * <= |d2| + |d''| + |d2 d''|
  369. * <= |d1| + 32 |d0| + 32 |d1 d0|
  370. * <= 33 eps + 32 eps^2.
  371. */
  372. return log1p(2*p0/(0.5 - p0));
  373. } else {
  374. /*
  375. * We have a choice of computing logit(1/2 + p0) or
  376. * -logit(1 - (1/2 + p0)) = -logit(1/2 - p0). It
  377. * doesn't matter which way we do this: either way,
  378. * since 1/2 p0 <= 1/2 <= 2 p0, the sum and difference
  379. * are computed exactly. So let's do the one that
  380. * skips the final negation.
  381. *
  382. * The result is
  383. *
  384. * (1 + d1) log((1 + d0) (1/2 + p0)/[(1 + d2) (1/2 - p0)])
  385. * = (1 + d1) (1 + log((1 + d0)/(1 + d2))
  386. * / log((1/2 + p0)/(1/2 - p0)))
  387. * * log((1/2 + p0)/(1/2 - p0))
  388. * = (1 + d') log((1/2 + p0)/(1/2 - p0))
  389. * = (1 + d') logit(1/2 + p0)
  390. *
  391. * where
  392. *
  393. * d' = d1 + log((1 + d0)/(1 + d2))/logit(1/2 + p0)
  394. * + d1 log((1 + d0)/(1 + d2))/logit(1/2 + p0).
  395. *
  396. * For |p| > 1/2 - 1/(1 + e), logit(1/2 + p0) > 1.
  397. * Provided |d0|, |d2| < 1/4, by Lemma 3 we have
  398. *
  399. * |log((1 + d0)/(1 + d2))| <= 4|d0 - d2|.
  400. *
  401. * Hence the relative error is bounded by
  402. *
  403. * |d'| <= |d1| + 4|d0 - d2| + 4|d1| |d0 - d2|
  404. * <= |d1| + 4|d0| + 4|d2| + 4|d1 d0| + 4|d1 d2|
  405. * <= 9 eps + 8 eps^2.
  406. */
  407. return log((0.5 + p0)/(0.5 - p0));
  408. }
  409. }
  410. /*
  411. * The following random_uniform_01 is tailored for IEEE 754 binary64
  412. * floating-point or smaller. It can be adapted to larger
  413. * floating-point formats like i387 80-bit or IEEE 754 binary128, but
  414. * it may require sampling more bits.
  415. */
  416. CTASSERT(FLT_RADIX == 2);
  417. CTASSERT(-DBL_MIN_EXP <= 1021);
  418. CTASSERT(DBL_MANT_DIG <= 53);
  419. /**
  420. * Draw a floating-point number in [0, 1] with uniform distribution.
  421. *
  422. * Note that the probability of returning 0 is less than 2^-1074, so
  423. * callers need not check for it. However, callers that cannot handle
  424. * rounding to 1 must deal with that, because it occurs with
  425. * probability 2^-54, which is small but nonnegligible.
  426. */
  427. STATIC double
  428. random_uniform_01(void)
  429. {
  430. uint32_t z, x, hi, lo;
  431. double s;
  432. /*
  433. * Draw an exponent, geometrically distributed, but give up if
  434. * we get a run of more than 1088 zeros, which really means the
  435. * system is broken.
  436. */
  437. z = 0;
  438. while ((x = crypto_fast_rng_get_u32(get_thread_fast_rng())) == 0) {
  439. if (z >= 1088)
  440. /* Your bit sampler is broken. Go home. */
  441. return 0;
  442. z += 32;
  443. }
  444. z += clz32(x);
  445. /*
  446. * Pick 32-bit halves of an odd normalized significand.
  447. * Picking it odd breaks ties in the subsequent rounding, which
  448. * occur only with measure zero in the uniform distribution on
  449. * [0, 1].
  450. */
  451. hi = crypto_fast_rng_get_u32(get_thread_fast_rng()) | UINT32_C(0x80000000);
  452. lo = crypto_fast_rng_get_u32(get_thread_fast_rng()) | UINT32_C(0x00000001);
  453. /* Round to nearest scaled significand in [2^63, 2^64]. */
  454. s = hi*(double)4294967296 + lo;
  455. /* Rescale into [1/2, 1] and apply exponent in one swell foop. */
  456. return s * ldexp(1, -(64 + z));
  457. }
  458. /*******************************************************************/
  459. /* Functions for specific probability distributions start here: */
  460. /*
  461. * Logistic(mu, sigma) distribution, supported on (-\infty,+\infty)
  462. *
  463. * This is the uniform distribution on [0,1] mapped into log-odds
  464. * space, scaled by sigma and translated by mu.
  465. *
  466. * pdf(x) = e^{-(x - mu)/sigma} sigma (1 + e^{-(x - mu)/sigma})^2
  467. * cdf(x) = 1/(1 + e^{-(x - mu)/sigma}) = logistic((x - mu)/sigma)
  468. * sf(x) = 1 - cdf(x) = 1 - logistic((x - mu)/sigma = logistic(-(x - mu)/sigma)
  469. * icdf(p) = mu + sigma log p/(1 - p) = mu + sigma logit(p)
  470. * isf(p) = mu + sigma log (1 - p)/p = mu - sigma logit(p)
  471. */
  472. /**
  473. * Compute the CDF of the Logistic(mu, sigma) distribution: the
  474. * logistic function. Well-conditioned for negative inputs and small
  475. * positive inputs; ill-conditioned for large positive inputs.
  476. */
  477. STATIC double
  478. cdf_logistic(double x, double mu, double sigma)
  479. {
  480. return logistic((x - mu)/sigma);
  481. }
  482. /**
  483. * Compute the SF of the Logistic(mu, sigma) distribution: the logistic
  484. * function reflected over the y axis. Well-conditioned for positive
  485. * inputs and small negative inputs; ill-conditioned for large negative
  486. * inputs.
  487. */
  488. STATIC double
  489. sf_logistic(double x, double mu, double sigma)
  490. {
  491. return logistic(-(x - mu)/sigma);
  492. }
  493. /**
  494. * Compute the inverse of the CDF of the Logistic(mu, sigma)
  495. * distribution: the logit function. Well-conditioned near 0;
  496. * ill-conditioned near 1/2 and 1.
  497. */
  498. STATIC double
  499. icdf_logistic(double p, double mu, double sigma)
  500. {
  501. return mu + sigma*logit(p);
  502. }
  503. /**
  504. * Compute the inverse of the SF of the Logistic(mu, sigma)
  505. * distribution: the -logit function. Well-conditioned near 0;
  506. * ill-conditioned near 1/2 and 1.
  507. */
  508. STATIC double
  509. isf_logistic(double p, double mu, double sigma)
  510. {
  511. return mu - sigma*logit(p);
  512. }
  513. /*
  514. * LogLogistic(alpha, beta) distribution, supported on (0, +\infty).
  515. *
  516. * This is the uniform distribution on [0,1] mapped into odds space,
  517. * scaled by positive alpha and shaped by positive beta.
  518. *
  519. * Equivalent to computing exp of a Logistic(log alpha, 1/beta) sample.
  520. * (Name arises because the pdf has LogLogistic(x; alpha, beta) =
  521. * Logistic(log x; log alpha, 1/beta) and mathematicians got their
  522. * covariance contravariant.)
  523. *
  524. * pdf(x) = (beta/alpha) (x/alpha)^{beta - 1}/(1 + (x/alpha)^beta)^2
  525. * = (1/e^mu sigma) (x/e^mu)^{1/sigma - 1} /
  526. * (1 + (x/e^mu)^{1/sigma})^2
  527. * cdf(x) = 1/(1 + (x/alpha)^-beta) = 1/(1 + (x/e^mu)^{-1/sigma})
  528. * = 1/(1 + (e^{log x}/e^mu)^{-1/sigma})
  529. * = 1/(1 + (e^{log x - mu})^{-1/sigma})
  530. * = 1/(1 + e^{-(log x - mu)/sigma})
  531. * = logistic((log x - mu)/sigma)
  532. * = logistic((log x - log alpha)/(1/beta))
  533. * sf(x) = 1 - 1/(1 + (x/alpha)^-beta)
  534. * = (x/alpha)^-beta/(1 + (x/alpha)^-beta)
  535. * = 1/((x/alpha)^beta + 1)
  536. * = 1/(1 + (x/alpha)^beta)
  537. * icdf(p) = alpha (p/(1 - p))^{1/beta}
  538. * = alpha e^{logit(p)/beta}
  539. * = e^{mu + sigma logit(p)}
  540. * isf(p) = alpha ((1 - p)/p)^{1/beta}
  541. * = alpha e^{-logit(p)/beta}
  542. * = e^{mu - sigma logit(p)}
  543. */
  544. /**
  545. * Compute the CDF of the LogLogistic(alpha, beta) distribution.
  546. * Well-conditioned for all x and alpha, and the condition number
  547. *
  548. * -beta/[1 + (x/alpha)^{-beta}]
  549. *
  550. * grows linearly with beta.
  551. *
  552. * Loosely, the relative error of this implementation is bounded by
  553. *
  554. * 4 eps + 2 eps^2 + O(beta eps),
  555. *
  556. * so don't bother trying this for beta anywhere near as large as
  557. * 1/eps, around which point it levels off at 1.
  558. */
  559. STATIC double
  560. cdf_log_logistic(double x, double alpha, double beta)
  561. {
  562. /*
  563. * Let d0 be the error of x/alpha; d1, of pow; d2, of +; and
  564. * d3, of the final quotient. The exponentiation gives
  565. *
  566. * ((1 + d0) x/alpha)^{-beta}
  567. * = (x/alpha)^{-beta} (1 + d0)^{-beta}
  568. * = (x/alpha)^{-beta} (1 + (1 + d0)^{-beta} - 1)
  569. * = (x/alpha)^{-beta} (1 + d')
  570. *
  571. * where d' = (1 + d0)^{-beta} - 1. If y = (x/alpha)^{-beta},
  572. * the denominator is
  573. *
  574. * (1 + d2) (1 + (1 + d1) (1 + d') y)
  575. * = (1 + d2) (1 + y + (d1 + d' + d1 d') y)
  576. * = 1 + y + (1 + d2) (d1 + d' + d1 d') y
  577. * = (1 + y) (1 + (1 + d2) (d1 + d' + d1 d') y/(1 + y))
  578. * = (1 + y) (1 + d''),
  579. *
  580. * where d'' = (1 + d2) (d1 + d' + d1 d') y/(1 + y). The
  581. * final result is
  582. *
  583. * (1 + d3) / [(1 + d2) (1 + d'') (1 + y)]
  584. * = (1 + d''') / (1 + y)
  585. *
  586. * for |d'''| <= 2|d3 - d''| by Lemma 2 as long as |d''| < 1/2
  587. * (which may not be the case for very large beta). This
  588. * relative error is therefore bounded by
  589. *
  590. * |d'''|
  591. * <= 2|d3 - d''|
  592. * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d') y/(1 + y)|
  593. * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d')|
  594. * = 2|d3| + 2|d1 + d' + d1 d' + d2 d1 + d2 d' + d2 d1 d'|
  595. * <= 2|d3| + 2|d1| + 2|d'| + 2|d1 d'| + 2|d2 d1| + 2|d2 d'|
  596. * + 2|d2 d1 d'|
  597. * <= 4 eps + 2 eps^2 + (2 + 2 eps + 2 eps^2) |d'|.
  598. *
  599. * Roughly, |d'| = |(1 + d0)^{-beta} - 1| grows like beta eps,
  600. * until it levels off at 1.
  601. */
  602. return 1/(1 + pow(x/alpha, -beta));
  603. }
  604. /**
  605. * Compute the SF of the LogLogistic(alpha, beta) distribution.
  606. * Well-conditioned for all x and alpha, and the condition number
  607. *
  608. * beta/[1 + (x/alpha)^beta]
  609. *
  610. * grows linearly with beta.
  611. *
  612. * Loosely, the relative error of this implementation is bounded by
  613. *
  614. * 4 eps + 2 eps^2 + O(beta eps)
  615. *
  616. * so don't bother trying this for beta anywhere near as large as
  617. * 1/eps, beyond which point it grows unbounded.
  618. */
  619. STATIC double
  620. sf_log_logistic(double x, double alpha, double beta)
  621. {
  622. /*
  623. * The error analysis here is essentially the same as in
  624. * cdf_log_logistic, except that rather than levelling off at
  625. * 1, |(1 + d0)^beta - 1| grows unbounded.
  626. */
  627. return 1/(1 + pow(x/alpha, beta));
  628. }
  629. /**
  630. * Compute the inverse of the CDF of the LogLogistic(alpha, beta)
  631. * distribution. Ill-conditioned for p near 1 and beta near 0 with
  632. * condition number 1/[beta (1 - p)].
  633. */
  634. STATIC double
  635. icdf_log_logistic(double p, double alpha, double beta)
  636. {
  637. return alpha*pow(p/(1 - p), 1/beta);
  638. }
  639. /**
  640. * Compute the inverse of the SF of the LogLogistic(alpha, beta)
  641. * distribution. Ill-conditioned for p near 1 and for large beta, with
  642. * condition number -1/[beta (1 - p)].
  643. */
  644. STATIC double
  645. isf_log_logistic(double p, double alpha, double beta)
  646. {
  647. return alpha*pow((1 - p)/p, 1/beta);
  648. }
  649. /*
  650. * Weibull(lambda, k) distribution, supported on (0, +\infty).
  651. *
  652. * pdf(x) = (k/lambda) (x/lambda)^{k - 1} e^{-(x/lambda)^k}
  653. * cdf(x) = 1 - e^{-(x/lambda)^k}
  654. * icdf(p) = lambda * (-log (1 - p))^{1/k}
  655. * sf(x) = e^{-(x/lambda)^k}
  656. * isf(p) = lambda * (-log p)^{1/k}
  657. */
  658. /**
  659. * Compute the CDF of the Weibull(lambda, k) distribution.
  660. * Well-conditioned for small x and k, and for large lambda --
  661. * condition number
  662. *
  663. * -k (x/lambda)^k exp(-(x/lambda)^k)/[exp(-(x/lambda)^k) - 1]
  664. *
  665. * grows linearly with k, x^k, and lambda^{-k}.
  666. */
  667. STATIC double
  668. cdf_weibull(double x, double lambda, double k)
  669. {
  670. return -expm1(-pow(x/lambda, k));
  671. }
  672. /**
  673. * Compute the SF of the Weibull(lambda, k) distribution.
  674. * Well-conditioned for small x and k, and for large lambda --
  675. * condition number
  676. *
  677. * -k (x/lambda)^k
  678. *
  679. * grows linearly with k, x^k, and lambda^{-k}.
  680. */
  681. STATIC double
  682. sf_weibull(double x, double lambda, double k)
  683. {
  684. return exp(-pow(x/lambda, k));
  685. }
  686. /**
  687. * Compute the inverse of the CDF of the Weibull(lambda, k)
  688. * distribution. Ill-conditioned for p near 1, and for k near 0;
  689. * condition number is
  690. *
  691. * (p/(1 - p))/(k log(1 - p)).
  692. */
  693. STATIC double
  694. icdf_weibull(double p, double lambda, double k)
  695. {
  696. return lambda*pow(-log1p(-p), 1/k);
  697. }
  698. /**
  699. * Compute the inverse of the SF of the Weibull(lambda, k)
  700. * distribution. Ill-conditioned for p near 0, and for k near 0;
  701. * condition number is
  702. *
  703. * 1/(k log(p)).
  704. */
  705. STATIC double
  706. isf_weibull(double p, double lambda, double k)
  707. {
  708. return lambda*pow(-log(p), 1/k);
  709. }
  710. /*
  711. * GeneralizedPareto(mu, sigma, xi), supported on (mu, +\infty) for
  712. * nonnegative xi, or (mu, mu - sigma/xi) for negative xi.
  713. *
  714. * Samples:
  715. * = mu - sigma log U, if xi = 0;
  716. * = mu + sigma (U^{-xi} - 1)/xi = mu + sigma*expm1(-xi log U)/xi, if xi =/= 0,
  717. * where U is uniform on (0,1].
  718. * = mu + sigma (e^{xi X} - 1)/xi,
  719. * where X has standard exponential distribution.
  720. *
  721. * pdf(x) = sigma^{-1} (1 + xi (x - mu)/sigma)^{-(1 + 1/xi)}
  722. * cdf(x) = 1 - (1 + xi (x - mu)/sigma)^{-1/xi}
  723. * = 1 - e^{-log(1 + xi (x - mu)/sigma)/xi}
  724. * --> 1 - e^{-(x - mu)/sigma} as xi --> 0
  725. * sf(x) = (1 + xi (x - mu)/sigma)^{-1/xi}
  726. * --> e^{-(x - mu)/sigma} as xi --> 0
  727. * icdf(p) = mu + sigma*(p^{-xi} - 1)/xi
  728. * = mu + sigma*expm1(-xi log p)/xi
  729. * --> mu + sigma*log p as xi --> 0
  730. * isf(p) = mu + sigma*((1 - p)^{xi} - 1)/xi
  731. * = mu + sigma*expm1(-xi log1p(-p))/xi
  732. * --> mu + sigma*log1p(-p) as xi --> 0
  733. */
  734. /**
  735. * Compute the CDF of the GeneralizedPareto(mu, sigma, xi)
  736. * distribution. Well-conditioned everywhere. For standard
  737. * distribution (mu=0, sigma=1), condition number
  738. *
  739. * (x/(1 + x xi)) / ((1 + x xi)^{1/xi} - 1)
  740. *
  741. * is bounded by 1, attained only at x = 0.
  742. */
  743. STATIC double
  744. cdf_genpareto(double x, double mu, double sigma, double xi)
  745. {
  746. double x_0 = (x - mu)/sigma;
  747. /*
  748. * log(1 + xi x_0)/xi
  749. * = (-1/xi) \sum_{n=1}^\infty (-xi x_0)^n/n
  750. * = (-1/xi) (-xi x_0 + \sum_{n=2}^\infty (-xi x_0)^n/n)
  751. * = x_0 - (1/xi) \sum_{n=2}^\infty (-xi x_0)^n/n
  752. * = x_0 - x_0 \sum_{n=2}^\infty (-xi x_0)^{n-1}/n
  753. * = x_0 (1 - d),
  754. *
  755. * where d = \sum_{n=2}^\infty (-xi x_0)^{n-1}/n. If |xi| <
  756. * eps/4|x_0|, then
  757. *
  758. * |d| <= \sum_{n=2}^\infty (eps/4)^{n-1}/n
  759. * <= \sum_{n=2}^\infty (eps/4)^{n-1}
  760. * = \sum_{n=1}^\infty (eps/4)^n
  761. * = (eps/4) \sum_{n=0}^\infty (eps/4)^n
  762. * = (eps/4)/(1 - eps/4)
  763. * < eps/2
  764. *
  765. * for any 0 < eps < 2. Thus, the relative error of x_0 from
  766. * log(1 + xi x_0)/xi is bounded by eps.
  767. */
  768. if (fabs(xi) < 1e-17/x_0)
  769. return -expm1(-x_0);
  770. else
  771. return -expm1(-log1p(xi*x_0)/xi);
  772. }
  773. /**
  774. * Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution.
  775. * For standard distribution (mu=0, sigma=1), ill-conditioned for xi
  776. * near 0; condition number
  777. *
  778. * -x (1 + x xi)^{(-1 - xi)/xi}/(1 + x xi)^{-1/xi}
  779. * = -x (1 + x xi)^{-1/xi - 1}/(1 + x xi)^{-1/xi}
  780. * = -(x/(1 + x xi)) (1 + x xi)^{-1/xi}/(1 + x xi)^{-1/xi}
  781. * = -x/(1 + x xi)
  782. *
  783. * is bounded by 1/xi.
  784. */
  785. STATIC double
  786. sf_genpareto(double x, double mu, double sigma, double xi)
  787. {
  788. double x_0 = (x - mu)/sigma;
  789. if (fabs(xi) < 1e-17/x_0)
  790. return exp(-x_0);
  791. else
  792. return exp(-log1p(xi*x_0)/xi);
  793. }
  794. /**
  795. * Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma,
  796. * xi) distribution. Ill-conditioned for p near 1; condition number is
  797. *
  798. * xi (p/(1 - p))/(1 - (1 - p)^xi)
  799. */
  800. STATIC double
  801. icdf_genpareto(double p, double mu, double sigma, double xi)
  802. {
  803. /*
  804. * To compute f(xi) = (U^{-xi} - 1)/xi = (e^{-xi log U} - 1)/xi
  805. * for xi near zero (note f(xi) --> -log U as xi --> 0), write
  806. * the absolutely convergent Taylor expansion
  807. *
  808. * f(xi) = (1/xi)*(-xi log U + \sum_{n=2}^\infty (-xi log U)^n/n!
  809. * = -log U + (1/xi)*\sum_{n=2}^\infty (-xi log U)^n/n!
  810. * = -log U + \sum_{n=2}^\infty xi^{n-1} (-log U)^n/n!
  811. * = -log U - log U \sum_{n=2}^\infty (-xi log U)^{n-1}/n!
  812. * = -log U (1 + \sum_{n=2}^\infty (-xi log U)^{n-1}/n!).
  813. *
  814. * Let d = \sum_{n=2}^\infty (-xi log U)^{n-1}/n!. What do we
  815. * lose if we discard it and use -log U as an approximation to
  816. * f(xi)? If |xi| < eps/-4log U, then
  817. *
  818. * |d| <= \sum_{n=2}^\infty |xi log U|^{n-1}/n!
  819. * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
  820. * <= \sum_{n=1}^\infty (eps/4)^n
  821. * = (eps/4) \sum_{n=0}^\infty (eps/4)^n
  822. * = (eps/4)/(1 - eps/4)
  823. * < eps/2,
  824. *
  825. * for any 0 < eps < 2. Hence, as long as |xi| < eps/-2log U,
  826. * f(xi) = -log U (1 + d) for |d| <= eps/2. |d| is the
  827. * relative error of f(xi) from -log U; from this bound, the
  828. * relative error of -log U from f(xi) is at most (eps/2)/(1 -
  829. * eps/2) = eps/2 + (eps/2)^2 + (eps/2)^3 + ... < eps for 0 <
  830. * eps < 1. Since -log U < 1000 for all U in (0, 1] in
  831. * binary64 floating-point, we can safely cut xi off at 1e-20 <
  832. * eps/4000 and attain <1ulp error from series truncation.
  833. */
  834. if (fabs(xi) <= 1e-20)
  835. return mu - sigma*log1p(-p);
  836. else
  837. return mu + sigma*expm1(-xi*log1p(-p))/xi;
  838. }
  839. /**
  840. * Compute the inverse of the SF of the GeneralizedPareto(mu, sigma,
  841. * xi) distribution. Ill-conditioned for p near 1; conditon number is
  842. *
  843. * -xi/(1 - p^{-xi})
  844. */
  845. STATIC double
  846. isf_genpareto(double p, double mu, double sigma, double xi)
  847. {
  848. if (fabs(xi) <= 1e-20)
  849. return mu - sigma*log(p);
  850. else
  851. return mu + sigma*expm1(-xi*log(p))/xi;
  852. }
  853. /*******************************************************************/
  854. /**
  855. * Deterministic samplers, parametrized by uniform integer and (0,1]
  856. * samples. No guarantees are made about _which_ mapping from the
  857. * integer and (0,1] samples these use; all that is guaranteed is the
  858. * distribution of the outputs conditioned on a uniform distribution on
  859. * the inputs. The automatic tests in test_prob_distr.c double-check
  860. * the particular mappings we use.
  861. *
  862. * Beware: Unlike random_uniform_01(), these are not guaranteed to be
  863. * supported on all possible outputs. See Ilya Mironov, `On the
  864. * Significance of the Least Significant Bits for Differential
  865. * Privacy', for an example of what can go wrong if you try to use
  866. * these to conceal information from an adversary but you expose the
  867. * specific full-precision floating-point values.
  868. *
  869. * Note: None of these samplers use rejection sampling; they are all
  870. * essentially inverse-CDF transforms with tweaks. If you were to add,
  871. * say, a Gamma sampler with the Marsaglia-Tsang method, you would have
  872. * to parametrize it by a potentially infinite stream of uniform (and
  873. * perhaps normal) samples rather than a fixed number, which doesn't
  874. * make for quite as nice automatic testing as for these.
  875. */
  876. /**
  877. * Deterministically sample from the interval [a, b], indexed by a
  878. * uniform random floating-point number p0 in (0, 1].
  879. *
  880. * Note that even if p0 is nonzero, the result may be equal to a, if
  881. * ulp(a)/2 is nonnegligible, e.g. if a = 1. For maximum resolution,
  882. * arrange |a| <= |b|.
  883. */
  884. STATIC double
  885. sample_uniform_interval(double p0, double a, double b)
  886. {
  887. /*
  888. * XXX Prove that the distribution is, in fact, uniform on
  889. * [a,b], particularly around p0 = 1, or at least has very
  890. * small deviation from uniform, quantified appropriately
  891. * (e.g., like in Monahan 1984, or by KL divergence). It
  892. * almost certainly does but it would be nice to quantify the
  893. * error.
  894. */
  895. if ((a <= 0 && 0 <= b) || (b <= 0 && 0 <= a)) {
  896. /*
  897. * When ab < 0, (1 - t) a + t b is monotonic, since for
  898. * a <= b it is a sum of nondecreasing functions of t,
  899. * and for b <= a, of nonincreasing functions of t.
  900. * Further, clearly at 0 and 1 it attains a and b,
  901. * respectively. Hence it is bounded within [a, b].
  902. */
  903. return (1 - p0)*a + p0*b;
  904. } else {
  905. /*
  906. * a + (b - a) t is monotonic -- it is obviously a
  907. * nondecreasing function of t for a <= b. Further, it
  908. * attains a at 0, and while it may overshoot b at 1,
  909. * we have a
  910. *
  911. * Theorem. If 0 <= t < 1, then the floating-point
  912. * evaluation of a + (b - a) t is bounded in [a, b].
  913. *
  914. * Lemma 1. If 0 <= t < 1 is a floating-point number,
  915. * then for any normal floating-point number x except
  916. * the smallest in magnitude, |round(x*t)| < |x|.
  917. *
  918. * Proof. WLOG, assume x >= 0. Since the rounding
  919. * function and t |---> x*t are nondecreasing, their
  920. * composition t |---> round(x*t) is also
  921. * nondecreasing, so it suffices to consider the
  922. * largest floating-point number below 1, in particular
  923. * t = 1 - ulp(1)/2.
  924. *
  925. * Case I: If x is a power of two, then the next
  926. * floating-point number below x is x - ulp(x)/2 = x -
  927. * x*ulp(1)/2 = x*(1 - ulp(1)/2) = x*t, so, since x*t
  928. * is a floating-point number, multiplication is exact,
  929. * and thus round(x*t) = x*t < x.
  930. *
  931. * Case II: If x is not a power of two, then the
  932. * greatest lower bound of real numbers rounded to x is
  933. * x - ulp(x)/2 = x - ulp(T(x))/2 = x - T(x)*ulp(1)/2,
  934. * where T(X) is the largest power of two below x.
  935. * Anything below this bound is rounded to a
  936. * floating-point number smaller than x, and x*t = x*(1
  937. * - ulp(1)/2) = x - x*ulp(1)/2 < x - T(x)*ulp(1)/2
  938. * since T(x) < x, so round(x*t) < x*t < x. QED.
  939. *
  940. * Lemma 2. If x and y are subnormal, then round(x +
  941. * y) = x + y.
  942. *
  943. * Proof. It is a matter of adding the significands,
  944. * since if we treat subnormals as having an implicit
  945. * zero bit before the `binary' point, their exponents
  946. * are all the same. There is at most one carry/borrow
  947. * bit, which can always be acommodated either in a
  948. * subnormal, or, at largest, in the implicit one bit
  949. * of a normal.
  950. *
  951. * Lemma 3. Let x and y be floating-point numbers. If
  952. * round(x - y) is subnormal or zero, then it is equal
  953. * to x - y.
  954. *
  955. * Proof. Case I (equal): round(x - y) = 0 iff x = y;
  956. * hence if round(x - y) = 0, then round(x - y) = 0 = x
  957. * - y.
  958. *
  959. * Case II (subnormal/subnormal): If x and y are both
  960. * subnormal, this follows directly from Lemma 2.
  961. *
  962. * Case IIIa (normal/subnormal): If x is normal and y
  963. * is subnormal, then x and y must share sign, or else
  964. * x - y would be larger than x and thus rounded to
  965. * normal. If s is the smallest normal positive
  966. * floating-point number, |x| < 2s since by
  967. * construction 2s - |y| is normal for all subnormal y.
  968. * This means that x and y must have the same exponent,
  969. * so the difference is the difference of significands,
  970. * which is exact.
  971. *
  972. * Case IIIb (subnormal/normal): Same as case IIIa for
  973. * -(y - x).
  974. *
  975. * Case IV (normal/normal): If x and y are both normal,
  976. * then they must share sign, or else x - y would be
  977. * larger than x and thus rounded to normal. Note that
  978. * |y| < 2|x|, for if |y| >= 2|x|, then |x| - |y| <=
  979. * -|x| but -|x| is normal like x. Also, |x|/2 < |y|:
  980. * if |x|/2 is subnormal, it must hold because y is
  981. * normal; if |x|/2 is normal, then |x|/2 >= s, so
  982. * since |x| - |y| < s,
  983. *
  984. * |x|/2 = |x| - |x|/2 <= |x| - s <= |y|;
  985. *
  986. * that is, |x|/2 < |y| < 2|x|, so by the Sterbenz
  987. * lemma, round(x - y) = x - y. QED.
  988. *
  989. * Proof of theorem. WLOG, assume 0 <= a <= b. Since
  990. * round(a + round(round(b - a)*t) is nondecreasing in
  991. * t and attains a at 0, the lower end of the bound is
  992. * trivial; we must show the upper end of the bound
  993. * strictly. It suffices to show this for the largest
  994. * floating-point number below 1, namely 1 - ulp(1)/2.
  995. *
  996. * Case I: round(b - a) is normal. Then it is at most
  997. * the smallest floating-point number above b - a. By
  998. * Lemma 1, round(round(b - a)*t) < round(b - a).
  999. * Since the inequality is strict, and since
  1000. * round(round(b - a)*t) is a floating-point number
  1001. * below round(b - a), and since there are no
  1002. * floating-point numbers between b - a and round(b -
  1003. * a), we must have round(round(b - a)*t) < b - a.
  1004. * Then since y |---> round(a + y) is nondecreasing, we
  1005. * must have
  1006. *
  1007. * round(a + round(round(b - a)*t))
  1008. * <= round(a + (b - a))
  1009. * = round(b) = b.
  1010. *
  1011. * Case II: round(b - a) is subnormal. In this case,
  1012. * Lemma 1 falls apart -- we are not guaranteed the
  1013. * strict inequality. However, by Lemma 3, the
  1014. * difference is exact: round(b - a) = b - a. Thus,
  1015. *
  1016. * round(a + round(round(b - a)*t))
  1017. * <= round(a + round((b - a)*t))
  1018. * <= round(a + (b - a))
  1019. * = round(b)
  1020. * = b,
  1021. *
  1022. * QED.
  1023. */
  1024. /* p0 is restricted to [0,1], but we use >= to silence -Wfloat-equal. */
  1025. if (p0 >= 1)
  1026. return b;
  1027. return a + (b - a)*p0;
  1028. }
  1029. }
  1030. /**
  1031. * Deterministically sample from the standard logistic distribution,
  1032. * indexed by a uniform random 32-bit integer s and uniform random
  1033. * floating-point numbers t and p0 in (0, 1].
  1034. */
  1035. STATIC double
  1036. sample_logistic(uint32_t s, double t, double p0)
  1037. {
  1038. double sign = (s & 1) ? -1 : +1;
  1039. double r;
  1040. /*
  1041. * We carve up the interval (0, 1) into subregions to compute
  1042. * the inverse CDF precisely:
  1043. *
  1044. * A = (0, 1/(1 + e)] ---> (-\infty, -1]
  1045. * B = [1/(1 + e), 1/2] ---> [-1, 0]
  1046. * C = [1/2, 1 - 1/(1 + e)] ---> [0, 1]
  1047. * D = [1 - 1/(1 + e), 1) ---> [1, +\infty)
  1048. *
  1049. * Cases D and C are mirror images of cases A and B,
  1050. * respectively, so we choose between them by the sign chosen
  1051. * by a fair coin toss. We choose between cases A and B by a
  1052. * coin toss weighted by
  1053. *
  1054. * 2/(1 + e) = 1 - [1/2 - 1/(1 + e)]/(1/2):
  1055. *
  1056. * if it comes up heads, scale p0 into a uniform (0, 1/(1 + e)]
  1057. * sample p; if it comes up tails, scale p0 into a uniform (0,
  1058. * 1/2 - 1/(1 + e)] sample and compute the inverse CDF of p =
  1059. * 1/2 - p0.
  1060. */
  1061. if (t <= 2/(1 + exp(1))) {
  1062. /* p uniform in (0, 1/(1 + e)], represented by p. */
  1063. p0 /= 1 + exp(1);
  1064. r = logit(p0);
  1065. } else {
  1066. /*
  1067. * p uniform in [1/(1 + e), 1/2), actually represented
  1068. * by p0 = 1/2 - p uniform in (0, 1/2 - 1/(1 + e)], so
  1069. * that p = 1/2 - p.
  1070. */
  1071. p0 *= 0.5 - 1/(1 + exp(1));
  1072. r = logithalf(p0);
  1073. }
  1074. /*
  1075. * We have chosen from the negative half of the standard
  1076. * logistic distribution, which is symmetric with the positive
  1077. * half. Now use the sign to choose uniformly between them.
  1078. */
  1079. return sign*r;
  1080. }
  1081. /**
  1082. * Deterministically sample from the logistic distribution scaled by
  1083. * sigma and translated by mu.
  1084. */
  1085. static double
  1086. sample_logistic_locscale(uint32_t s, double t, double p0, double mu,
  1087. double sigma)
  1088. {
  1089. return mu + sigma*sample_logistic(s, t, p0);
  1090. }
  1091. /**
  1092. * Deterministically sample from the standard log-logistic
  1093. * distribution, indexed by a uniform random 32-bit integer s and a
  1094. * uniform random floating-point number p0 in (0, 1].
  1095. */
  1096. STATIC double
  1097. sample_log_logistic(uint32_t s, double p0)
  1098. {
  1099. /*
  1100. * Carve up the interval (0, 1) into (0, 1/2] and [1/2, 1); the
  1101. * condition numbers of the icdf and the isf coincide at 1/2.
  1102. */
  1103. p0 *= 0.5;
  1104. if ((s & 1) == 0) {
  1105. /* p = p0 in (0, 1/2] */
  1106. return p0/(1 - p0);
  1107. } else {
  1108. /* p = 1 - p0 in [1/2, 1) */
  1109. return (1 - p0)/p0;
  1110. }
  1111. }
  1112. /**
  1113. * Deterministically sample from the log-logistic distribution with
  1114. * scale alpha and shape beta.
  1115. */
  1116. static double
  1117. sample_log_logistic_scaleshape(uint32_t s, double p0, double alpha,
  1118. double beta)
  1119. {
  1120. double x = sample_log_logistic(s, p0);
  1121. return alpha*pow(x, 1/beta);
  1122. }
  1123. /**
  1124. * Deterministically sample from the standard exponential distribution,
  1125. * indexed by a uniform random 32-bit integer s and a uniform random
  1126. * floating-point number p0 in (0, 1].
  1127. */
  1128. static double
  1129. sample_exponential(uint32_t s, double p0)
  1130. {
  1131. /*
  1132. * We would like to evaluate log(p) for p near 0, and log1p(-p)
  1133. * for p near 1. Simply carve the interval into (0, 1/2] and
  1134. * [1/2, 1) by a fair coin toss.
  1135. */
  1136. p0 *= 0.5;
  1137. if ((s & 1) == 0)
  1138. /* p = p0 in (0, 1/2] */
  1139. return -log(p0);
  1140. else
  1141. /* p = 1 - p0 in [1/2, 1) */
  1142. return -log1p(-p0);
  1143. }
  1144. /**
  1145. * Deterministically sample from a Weibull distribution with scale
  1146. * lambda and shape k -- just an exponential with a shape parameter in
  1147. * addition to a scale parameter. (Yes, lambda really is the scale,
  1148. * _not_ the rate.)
  1149. */
  1150. STATIC double
  1151. sample_weibull(uint32_t s, double p0, double lambda, double k)
  1152. {
  1153. return lambda*pow(sample_exponential(s, p0), 1/k);
  1154. }
  1155. /**
  1156. * Deterministically sample from the generalized Pareto distribution
  1157. * with shape xi, indexed by a uniform random 32-bit integer s and a
  1158. * uniform random floating-point number p0 in (0, 1].
  1159. */
  1160. STATIC double
  1161. sample_genpareto(uint32_t s, double p0, double xi)
  1162. {
  1163. double x = sample_exponential(s, p0);
  1164. /*
  1165. * Write f(xi) = (e^{xi x} - 1)/xi for xi near zero as the
  1166. * absolutely convergent Taylor series
  1167. *
  1168. * f(x) = (1/xi) (xi x + \sum_{n=2}^\infty (xi x)^n/n!)
  1169. * = x + (1/xi) \sum_{n=2}^\inty (xi x)^n/n!
  1170. * = x + \sum_{n=2}^\infty xi^{n-1} x^n/n!
  1171. * = x + x \sum_{n=2}^\infty (xi x)^{n-1}/n!
  1172. * = x (1 + \sum_{n=2}^\infty (xi x)^{n-1}/n!).
  1173. *
  1174. * d = \sum_{n=2}^\infty (xi x)^{n-1}/n! is the relative error
  1175. * of f(x) from x. If |xi| < eps/4x, then
  1176. *
  1177. * |d| <= \sum_{n=2}^\infty |xi x|^{n-1}/n!
  1178. * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
  1179. * <= \sum_{n=1}^\infty (eps/4)
  1180. * = (eps/4) \sum_{n=0}^\infty (eps/4)^n
  1181. * = (eps/4)/(1 - eps/4)
  1182. * < eps/2,
  1183. *
  1184. * for any 0 < eps < 2. Hence, as long as |xi| < eps/2x, f(xi)
  1185. * = x (1 + d) for |d| <= eps/2, so x = f(xi) (1 + d') for |d'|
  1186. * <= eps. What bound should we use for x?
  1187. *
  1188. * - If x is exponentially distributed, x > 200 with
  1189. * probability below e^{-200} << 2^{-256}, i.e. never.
  1190. *
  1191. * - If x is computed by -log(U) for U in (0, 1], x is
  1192. * guaranteed to be below 1000 in IEEE 754 binary64
  1193. * floating-point.
  1194. *
  1195. * We can safely cut xi off at 1e-20 < eps/4000 and attain an
  1196. * error bounded by 0.5 ulp for this expression.
  1197. */
  1198. return (fabs(xi) < 1e-20 ? x : expm1(xi*x)/xi);
  1199. }
  1200. /**
  1201. * Deterministically sample from a generalized Pareto distribution with
  1202. * shape xi, scaled by sigma and translated by mu.
  1203. */
  1204. static double
  1205. sample_genpareto_locscale(uint32_t s, double p0, double mu, double sigma,
  1206. double xi)
  1207. {
  1208. return mu + sigma*sample_genpareto(s, p0, xi);
  1209. }
  1210. /**
  1211. * Deterministically sample from the geometric distribution with
  1212. * per-trial success probability p.
  1213. *
  1214. * XXX Quantify the error (KL divergence?) of this
  1215. * ceiling-of-exponential sampler from a true geometric distribution,
  1216. * which we could get by rejection sampling. Relevant papers:
  1217. *
  1218. * John F. Monahan, `Accuracy in Random Number Generation',
  1219. * Mathematics of Computation 45(172), October 1984, pp. 559--568.
  1220. *https://pdfs.semanticscholar.org/aca6/74b96da1df77b2224e8cfc5dd6d61a471632.pdf
  1221. *
  1222. * Karl Bringmann and Tobias Friedrich, `Exact and Efficient
  1223. * Generation of Geometric Random Variates and Random Graphs', in
  1224. * Proceedings of the 40th International Colloaquium on Automata,
  1225. * Languages, and Programming -- ICALP 2013, Springer LNCS 7965,
  1226. * pp.267--278.
  1227. * https://doi.org/10.1007/978-3-642-39206-1_23
  1228. * https://people.mpi-inf.mpg.de/~kbringma/paper/2013ICALP-1.pdf
  1229. */
  1230. static double
  1231. sample_geometric(uint32_t s, double p0, double p)
  1232. {
  1233. double x = sample_exponential(s, p0);
  1234. /* This is actually a check against 1, but we do >= so that the compiler
  1235. does not raise a -Wfloat-equal */
  1236. if (p >= 1)
  1237. return 1;
  1238. return ceil(-x/log1p(-p));
  1239. }
  1240. /*******************************************************************/
  1241. /** Public API for probability distributions:
  1242. *
  1243. * These are wrapper functions on top of the various probability distribution
  1244. * operations using the generic <b>dist</b> structure.
  1245. * These are the functions that should be used by consumers of this API.
  1246. */
  1247. /** Returns the name of the distribution in <b>dist</b>. */
  1248. const char *
  1249. dist_name(const struct dist *dist)
  1250. {
  1251. return dist->ops->name;
  1252. }
  1253. /* Sample a value from <b>dist</b> and return it. */
  1254. double
  1255. dist_sample(const struct dist *dist)
  1256. {
  1257. return dist->ops->sample(dist);
  1258. }
  1259. /** Compute the CDF of <b>dist</b> at <b>x</b>. */
  1260. double
  1261. dist_cdf(const struct dist *dist, double x)
  1262. {
  1263. return dist->ops->cdf(dist, x);
  1264. }
  1265. /** Compute the SF (Survival function) of <b>dist</b> at <b>x</b>. */
  1266. double
  1267. dist_sf(const struct dist *dist, double x)
  1268. {
  1269. return dist->ops->sf(dist, x);
  1270. }
  1271. /** Compute the iCDF (Inverse CDF) of <b>dist</b> at <b>x</b>. */
  1272. double
  1273. dist_icdf(const struct dist *dist, double p)
  1274. {
  1275. return dist->ops->icdf(dist, p);
  1276. }
  1277. /** Compute the iSF (Inverse Survival function) of <b>dist</b> at <b>x</b>. */
  1278. double
  1279. dist_isf(const struct dist *dist, double p)
  1280. {
  1281. return dist->ops->isf(dist, p);
  1282. }
  1283. /** Functions for uniform distribution */
  1284. static double
  1285. uniform_sample(const struct dist *dist)
  1286. {
  1287. const struct uniform *U = dist_to_const_uniform(dist);
  1288. double p0 = random_uniform_01();
  1289. return sample_uniform_interval(p0, U->a, U->b);
  1290. }
  1291. static double
  1292. uniform_cdf(const struct dist *dist, double x)
  1293. {
  1294. const struct uniform *U = dist_to_const_uniform(dist);
  1295. if (x < U->a)
  1296. return 0;
  1297. else if (x < U->b)
  1298. return (x - U->a)/(U->b - U->a);
  1299. else
  1300. return 1;
  1301. }
  1302. static double
  1303. uniform_sf(const struct dist *dist, double x)
  1304. {
  1305. const struct uniform *U = dist_to_const_uniform(dist);
  1306. if (x > U->b)
  1307. return 0;
  1308. else if (x > U->a)
  1309. return (U->b - x)/(U->b - U->a);
  1310. else
  1311. return 1;
  1312. }
  1313. static double
  1314. uniform_icdf(const struct dist *dist, double p)
  1315. {
  1316. const struct uniform *U = dist_to_const_uniform(dist);
  1317. double w = U->b - U->a;
  1318. return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p)));
  1319. }
  1320. static double
  1321. uniform_isf(const struct dist *dist, double p)
  1322. {
  1323. const struct uniform *U = dist_to_const_uniform(dist);
  1324. double w = U->b - U->a;
  1325. return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p)));
  1326. }
  1327. const struct dist_ops uniform_ops = {
  1328. .name = "uniform",
  1329. .sample = uniform_sample,
  1330. .cdf = uniform_cdf,
  1331. .sf = uniform_sf,
  1332. .icdf = uniform_icdf,
  1333. .isf = uniform_isf,
  1334. };
  1335. /*******************************************************************/
  1336. /** Private functions for each probability distribution. */
  1337. /** Functions for logistic distribution: */
  1338. static double
  1339. logistic_sample(const struct dist *dist)
  1340. {
  1341. const struct logistic *L = dist_to_const_logistic(dist);
  1342. uint32_t s = crypto_fast_rng_get_u32(get_thread_fast_rng());
  1343. double t = random_uniform_01();
  1344. double p0 = random_uniform_01();
  1345. return sample_logistic_locscale(s, t, p0, L->mu, L->sigma);
  1346. }
  1347. static double
  1348. logistic_cdf(const struct dist *dist, double x)
  1349. {
  1350. const struct logistic *L = dist_to_const_logistic(dist);
  1351. return cdf_logistic(x, L->mu, L->sigma);
  1352. }
  1353. static double
  1354. logistic_sf(const struct dist *dist, double x)
  1355. {
  1356. const struct logistic *L = dist_to_const_logistic(dist);
  1357. return sf_logistic(x, L->mu, L->sigma);
  1358. }
  1359. static double
  1360. logistic_icdf(const struct dist *dist, double p)
  1361. {
  1362. const struct logistic *L = dist_to_const_logistic(dist);
  1363. return icdf_logistic(p, L->mu, L->sigma);
  1364. }
  1365. static double
  1366. logistic_isf(const struct dist *dist, double p)
  1367. {
  1368. const struct logistic *L = dist_to_const_logistic(dist);
  1369. return isf_logistic(p, L->mu, L->sigma);
  1370. }
  1371. const struct dist_ops logistic_ops = {
  1372. .name = "logistic",
  1373. .sample = logistic_sample,
  1374. .cdf = logistic_cdf,
  1375. .sf = logistic_sf,
  1376. .icdf = logistic_icdf,
  1377. .isf = logistic_isf,
  1378. };
  1379. /** Functions for log-logistic distribution: */
  1380. static double
  1381. log_logistic_sample(const struct dist *dist)
  1382. {
  1383. const struct log_logistic *LL = dist_to_const_log_logistic(dist);
  1384. uint32_t s = crypto_fast_rng_get_u32(get_thread_fast_rng());
  1385. double p0 = random_uniform_01();
  1386. return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta);
  1387. }
  1388. static double
  1389. log_logistic_cdf(const struct dist *dist, double x)
  1390. {
  1391. const struct log_logistic *LL = dist_to_const_log_logistic(dist);
  1392. return cdf_log_logistic(x, LL->alpha, LL->beta);
  1393. }
  1394. static double
  1395. log_logistic_sf(const struct dist *dist, double x)
  1396. {
  1397. const struct log_logistic *LL = dist_to_const_log_logistic(dist);
  1398. return sf_log_logistic(x, LL->alpha, LL->beta);
  1399. }
  1400. static double
  1401. log_logistic_icdf(const struct dist *dist, double p)
  1402. {
  1403. const struct log_logistic *LL = dist_to_const_log_logistic(dist);
  1404. return icdf_log_logistic(p, LL->alpha, LL->beta);
  1405. }
  1406. static double
  1407. log_logistic_isf(const struct dist *dist, double p)
  1408. {
  1409. const struct log_logistic *LL = dist_to_const_log_logistic(dist);
  1410. return isf_log_logistic(p, LL->alpha, LL->beta);
  1411. }
  1412. const struct dist_ops log_logistic_ops = {
  1413. .name = "log logistic",
  1414. .sample = log_logistic_sample,
  1415. .cdf = log_logistic_cdf,
  1416. .sf = log_logistic_sf,
  1417. .icdf = log_logistic_icdf,
  1418. .isf = log_logistic_isf,
  1419. };
  1420. /** Functions for Weibull distribution */
  1421. static double
  1422. weibull_sample(const struct dist *dist)
  1423. {
  1424. const struct weibull *W = dist_to_const_weibull(dist);
  1425. uint32_t s = crypto_fast_rng_get_u32(get_thread_fast_rng());
  1426. double p0 = random_uniform_01();
  1427. return sample_weibull(s, p0, W->lambda, W->k);
  1428. }
  1429. static double
  1430. weibull_cdf(const struct dist *dist, double x)
  1431. {
  1432. const struct weibull *W = dist_to_const_weibull(dist);
  1433. return cdf_weibull(x, W->lambda, W->k);
  1434. }
  1435. static double
  1436. weibull_sf(const struct dist *dist, double x)
  1437. {
  1438. const struct weibull *W = dist_to_const_weibull(dist);
  1439. return sf_weibull(x, W->lambda, W->k);
  1440. }
  1441. static double
  1442. weibull_icdf(const struct dist *dist, double p)
  1443. {
  1444. const struct weibull *W = dist_to_const_weibull(dist);
  1445. return icdf_weibull(p, W->lambda, W->k);
  1446. }
  1447. static double
  1448. weibull_isf(const struct dist *dist, double p)
  1449. {
  1450. const struct weibull *W = dist_to_const_weibull(dist);
  1451. return isf_weibull(p, W->lambda, W->k);
  1452. }
  1453. const struct dist_ops weibull_ops = {
  1454. .name = "Weibull",
  1455. .sample = weibull_sample,
  1456. .cdf = weibull_cdf,
  1457. .sf = weibull_sf,
  1458. .icdf = weibull_icdf,
  1459. .isf = weibull_isf,
  1460. };
  1461. /** Functions for generalized Pareto distributions */
  1462. static double
  1463. genpareto_sample(const struct dist *dist)
  1464. {
  1465. const struct genpareto *GP = dist_to_const_genpareto(dist);
  1466. uint32_t s = crypto_fast_rng_get_u32(get_thread_fast_rng());
  1467. double p0 = random_uniform_01();
  1468. return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi);
  1469. }
  1470. static double
  1471. genpareto_cdf(const struct dist *dist, double x)
  1472. {
  1473. const struct genpareto *GP = dist_to_const_genpareto(dist);
  1474. return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi);
  1475. }
  1476. static double
  1477. genpareto_sf(const struct dist *dist, double x)
  1478. {
  1479. const struct genpareto *GP = dist_to_const_genpareto(dist);
  1480. return sf_genpareto(x, GP->mu, GP->sigma, GP->xi);
  1481. }
  1482. static double
  1483. genpareto_icdf(const struct dist *dist, double p)
  1484. {
  1485. const struct genpareto *GP = dist_to_const_genpareto(dist);
  1486. return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi);
  1487. }
  1488. static double
  1489. genpareto_isf(const struct dist *dist, double p)
  1490. {
  1491. const struct genpareto *GP = dist_to_const_genpareto(dist);
  1492. return isf_genpareto(p, GP->mu, GP->sigma, GP->xi);
  1493. }
  1494. const struct dist_ops genpareto_ops = {
  1495. .name = "generalized Pareto",
  1496. .sample = genpareto_sample,
  1497. .cdf = genpareto_cdf,
  1498. .sf = genpareto_sf,
  1499. .icdf = genpareto_icdf,
  1500. .isf = genpareto_isf,
  1501. };
  1502. /** Functions for geometric distribution on number of trials before success */
  1503. static double
  1504. geometric_sample(const struct dist *dist)
  1505. {
  1506. const struct geometric *G = dist_to_const_geometric(dist);
  1507. uint32_t s = crypto_fast_rng_get_u32(get_thread_fast_rng());
  1508. double p0 = random_uniform_01();
  1509. return sample_geometric(s, p0, G->p);
  1510. }
  1511. static double
  1512. geometric_cdf(const struct dist *dist, double x)
  1513. {
  1514. const struct geometric *G = dist_to_const_geometric(dist);
  1515. if (x < 1)
  1516. return 0;
  1517. /* 1 - (1 - p)^floor(x) = 1 - e^{floor(x) log(1 - p)} */
  1518. return -expm1(floor(x)*log1p(-G->p));
  1519. }
  1520. static double
  1521. geometric_sf(const struct dist *dist, double x)
  1522. {
  1523. const struct geometric *G = dist_to_const_geometric(dist);
  1524. if (x < 1)
  1525. return 0;
  1526. /* (1 - p)^floor(x) = e^{ceil(x) log(1 - p)} */
  1527. return exp(floor(x)*log1p(-G->p));
  1528. }
  1529. static double
  1530. geometric_icdf(const struct dist *dist, double p)
  1531. {
  1532. const struct geometric *G = dist_to_const_geometric(dist);
  1533. return log1p(-p)/log1p(-G->p);
  1534. }
  1535. static double
  1536. geometric_isf(const struct dist *dist, double p)
  1537. {
  1538. const struct geometric *G = dist_to_const_geometric(dist);
  1539. return log(p)/log1p(-G->p);
  1540. }
  1541. const struct dist_ops geometric_ops = {
  1542. .name = "geometric (1-based)",
  1543. .sample = geometric_sample,
  1544. .cdf = geometric_cdf,
  1545. .sf = geometric_sf,
  1546. .icdf = geometric_icdf,
  1547. .isf = geometric_isf,
  1548. };