NFLlib.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923
  1. /* Copyright (C) 2014 Carlos Aguilar Melchor, Joris Barrier, Marc-Olivier Killijian
  2. * This file is part of XPIR.
  3. *
  4. * XPIR is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * XPIR is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with XPIR. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "NFLlib.hpp"
  18. #include <x86intrin.h>
  19. void DEBUG_MESSAGE(const char *s, poly64 p, unsigned int n){
  20. #ifdef DEBUG
  21. std::cout<<s;
  22. NFLlib::print_poly64hex(p,n);
  23. #endif
  24. }
  25. // *********************************************************
  26. // Constructors and init functions
  27. // *********************************************************
  28. // The constructors are not able to set all the parameters and setNewParameters has to be called
  29. // after them. The attribute alreadyInit reflects this uninitialized status.
  30. NFLlib::NFLlib():
  31. alreadyInit(0),
  32. nbModuli(0),
  33. polyDegree(0)
  34. {
  35. }
  36. // Compute all data needed to do NTT operations
  37. // Depends on the moduli used and polyDegree
  38. void NFLlib::configureNTT()
  39. {
  40. uint64_t phi, invphi, omega, invomega, temp;
  41. // If this is a reconfiguration free memory before reallocating it
  42. freeNTTMemory();
  43. // Allocate space for the first dimension of the arrays needed for the NTT and CRT
  44. phis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  45. shoupphis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  46. invpoly_times_invphis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  47. shoupinvpoly_times_invphis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  48. omegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  49. shoupomegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  50. invomegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  51. shoupinvomegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  52. invpolyDegree = (uint64_t *) malloc(nbModuli * sizeof(uint64_t));
  53. liftingIntegers = new mpz_t[nbModuli];
  54. moduli=new uint64_t[nbModuli]();
  55. // From now on, we have to do everything nbModuli times
  56. for(unsigned short currentModulus=0;currentModulus<nbModuli;currentModulus++)
  57. {
  58. // Define the moduli we use (useful ?)
  59. moduli[currentModulus] = P64[currentModulus];
  60. // Allocation of the second dimension of the NTT parameters
  61. phis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  62. shoupphis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  63. invpoly_times_invphis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  64. shoupinvpoly_times_invphis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  65. omegas[currentModulus] = (uint64_t *) malloc((2 * polyDegree) * sizeof(uint64_t));
  66. shoupomegas[currentModulus] = omegas[currentModulus] + polyDegree;
  67. invomegas[currentModulus] = (uint64_t *) malloc((2 * polyDegree) * sizeof(uint64_t));
  68. shoupinvomegas[currentModulus] = invomegas[currentModulus] + polyDegree;
  69. // We start by computing phi
  70. // The roots in the array are primitve 2**14-th roots
  71. // Squared 14-log2(polyDegree) times they become
  72. // polyDegree-th roots as required by the NTT
  73. // But first we get phi = sqrt(omega) squaring them 13-log2(polyDegree) times
  74. phi = primitive_roots[currentModulus];
  75. for (unsigned int i = 0 ; i < 13 - log2(polyDegree) ; i++)
  76. {
  77. phi = mulmod(phi, phi, moduli[currentModulus]);
  78. }
  79. // Now that temp = phi we initialize the array of phi**i values
  80. // Initialized to phi**0
  81. temp = 1;
  82. for (unsigned int i = 0 ; i < polyDegree ; i++)
  83. {
  84. phis[currentModulus][i] = temp;
  85. shoupphis[currentModulus][i] = ((uint128_t) temp << 64) / moduli[currentModulus];
  86. // phi**(i+1)
  87. temp = mulmod(temp, phi, moduli[currentModulus]);
  88. }
  89. // At the end of the loop temp = phi**polyDegree
  90. // Computation of invphi
  91. // phi**(2*polydegree)=1 -> temp*phi**(polyDegree-1) = phi**(-1)
  92. invphi = mulmod(temp, phis[currentModulus][polyDegree-1], moduli[currentModulus]);
  93. // Computation of the inverse of polyDegree using the inverse of kMaxPolyDegree
  94. invpolyDegree[currentModulus] = mulmod(invkMaxPolyDegree[currentModulus],
  95. kMaxPolyDegree/polyDegree, moduli[currentModulus]);
  96. // Now we can compute the table invpoly_times_invphis
  97. temp = invpolyDegree[currentModulus];
  98. for (unsigned int i = 0 ; i < polyDegree ; i++)
  99. {
  100. invpoly_times_invphis[currentModulus][i] = temp;
  101. shoupinvpoly_times_invphis[currentModulus][i] = ((uint128_t) temp << 64)
  102. / moduli[currentModulus];
  103. // This is invpolyDegree*invphi**(i+1)
  104. temp = mulmod(temp, invphi, moduli[currentModulus]);
  105. }
  106. // For the omegas it is easy, we just use the function of David Harvey modified for our needs
  107. omega = mulmod(phi, phi, moduli[currentModulus]);
  108. prep_wtab(omegas[currentModulus], shoupomegas[currentModulus], omega, polyDegree,
  109. moduli[currentModulus]);
  110. // And again for the invomegas
  111. invomega = mulmod(invphi, invphi, moduli[currentModulus]);
  112. prep_wtab(invomegas[currentModulus], shoupinvomegas[currentModulus], invomega, polyDegree,
  113. moduli[currentModulus]);
  114. // Inverse-CRT constants
  115. mpz_t tmpz, mpz_inverse;
  116. mpz_init(tmpz);
  117. mpz_init(mpz_inverse);
  118. // Compute first the product of all moduli but the current
  119. mpz_init_set_ui(liftingIntegers[currentModulus],1UL);
  120. for (unsigned int i = 0 ; i < nbModuli ; i++)
  121. {
  122. if (i == currentModulus) continue;
  123. mpz_import(tmpz, 1, 1, sizeof(uint64_t), 0, 0, &P64[i]);
  124. mpz_mul(liftingIntegers[currentModulus], liftingIntegers[currentModulus], tmpz);
  125. }
  126. // Compute the inverse of the product modulo the current modulus and multiply it with the product
  127. mpz_import(tmpz, 1, 1, sizeof(uint64_t), 0, 0, &moduli[currentModulus]);
  128. mpz_invert(mpz_inverse, liftingIntegers[currentModulus], tmpz);
  129. mpz_mul(liftingIntegers[currentModulus], liftingIntegers[currentModulus], mpz_inverse);
  130. mpz_clear(tmpz);
  131. mpz_clear(mpz_inverse);
  132. }
  133. // Compute the product of all moduli
  134. mpz_t tmpz;
  135. mpz_init(tmpz);
  136. mpz_init_set_ui(moduliProduct,1UL);
  137. for (unsigned int i = 0 ; i < nbModuli ; i++)
  138. {
  139. mpz_import(tmpz, 1, 1, sizeof(uint64_t), 0, 0, &moduli[i]);
  140. mpz_mul(moduliProduct, moduliProduct, tmpz);
  141. }
  142. mpz_clear(tmpz);
  143. uint64_t* inv_indexes_tmp = malloc_align<32, uint64_t>(polyDegree);
  144. inv_indexes = malloc_align<16>(polyDegree, inv_indexes);
  145. // Compute permutation indexes for inv_ntt
  146. for (size_t i = 0; i < polyDegree; i++)
  147. {
  148. size_t ii = i, r = 0;
  149. for (unsigned h = 1; h < polyDegree; h=h<<1)
  150. {
  151. r = (r << 1) | (ii & 1);
  152. ii >>= 1;
  153. }
  154. inv_indexes_tmp[i] = r;
  155. }
  156. // Invert the previous permutation. I think we can do better than that :)
  157. for (size_t i = 0; i < polyDegree; i++) {
  158. for (size_t j = 0; j < polyDegree; j++) {
  159. if (inv_indexes_tmp[j] == i) {
  160. inv_indexes[i] = j;
  161. break;
  162. }
  163. }
  164. }
  165. free(inv_indexes_tmp);
  166. alreadyInit = nbModuli;
  167. }
  168. // *********************************************************************
  169. // Fundamental setters (resulting on a call to the init function above)
  170. // *********************************************************************
  171. // Set or reset the degrees of the polynomials used and the modulus size (which fixes the modulus)
  172. void NFLlib::setNewParameters(unsigned int polyDegree_, unsigned int aggregatedModulusBitsize_)
  173. {
  174. // We don't use setpolyDegree to avoid configureNTT being called twice
  175. polyDegree = polyDegree_;
  176. // We use a special function for setting the modulus as it can be called independently
  177. // and requires some processing. This function also ensures that the NTT params are configured.
  178. setmodulus(aggregatedModulusBitsize_);
  179. }
  180. // Sets the modulus size AND configures NTT parmeters (which fixes a given modulus)
  181. void NFLlib::setmodulus(uint64_t aggregatedModulusBitsize_)
  182. {
  183. // For the CRT, from the aggregated modulus bitsize, we compute the number of necessary moduli
  184. if (aggregatedModulusBitsize_ % kModulusBitsize != 0)
  185. {
  186. std::cout << "NFLlib: CRITICAL. Modulus of " << aggregatedModulusBitsize_
  187. << " requested but only integer multiples of " << kModulusBitsize << " bits implemented. Exiting ..." << std::endl;
  188. exit(-1);
  189. }
  190. nbModuli=aggregatedModulusBitsize_/kModulusBitsize;
  191. configureNTT();
  192. }
  193. // Sets polyDegree AND configures NTT
  194. void NFLlib::setpolyDegree(unsigned int polyDegree_)
  195. {
  196. polyDegree = polyDegree_;
  197. configureNTT();
  198. }
  199. // *********************************************************
  200. // Getters
  201. // *********************************************************
  202. uint64_t* NFLlib::getmoduli() { return moduli; }
  203. unsigned short NFLlib::getnbModuli() { return nbModuli; }
  204. unsigned int NFLlib::getpolyDegree() { return polyDegree; }
  205. void NFLlib::copymoduliProduct(mpz_t dest) { mpz_init_set(dest, moduliProduct); }
  206. // **************************************
  207. // Random polynomial generation functions
  208. // **************************************
  209. // Two modes uniform or bounded (if uniform is false)
  210. // WARNING : The bounded mode only works for a bound
  211. // below the smaller of the moduli -> we use or for bounded noise
  212. // Allocates and sets a bounded random polynomial in FFT form calling setBoundedRandomPoly
  213. poly64 NFLlib::allocBoundedRandomPoly(uint64_t upperBound_, bool uniform_)
  214. {
  215. poly64 res = (poly64)calloc(polyDegree * nbModuli, sizeof(uint64_t));
  216. setBoundedRandomPoly(res, upperBound_, uniform_);
  217. return res;
  218. }
  219. // Sets a pre-allocated random polynomial in FFT form
  220. // If uniform = true upperBound is ignored and the coefficients are uniformly random
  221. // ASSUMPTION: if uniform = false upperBound is below all of the moduli used
  222. void NFLlib::setBoundedRandomPoly(poly64 res, uint64_t upperBound_, bool uniform_)
  223. {
  224. poly64 rnd, rnd_orig;
  225. uint64_t mask;
  226. if (uniform_ == false){
  227. // In bounded mode upperBound must be below the smaller of the moduli
  228. for (unsigned int cm = 0 ; cm < nbModuli ; cm++)
  229. {
  230. if (upperBound_ >= moduli[cm])
  231. {
  232. std::cout << "NFLlib: upperBound is larger than the moduli in setBoundedRandomPoly.";
  233. std::cout << " Unpredictable results ..." << std::endl;
  234. break;
  235. }
  236. }
  237. // We play with the rnd pointer (in the uniform case), and thus
  238. // we need to remember the allocated pointer to free it at the end
  239. rnd_orig = (poly64) malloc(polyDegree * sizeof(uint64_t));
  240. rnd = rnd_orig;
  241. // Get some randomness from the PRNG
  242. fastrandombytes((unsigned char *)rnd, polyDegree * sizeof(uint64_t));
  243. // upperBound is below the moduli so we create the same mask for all the moduli
  244. mask=(1ULL << (unsigned int)ceil(log2(upperBound_))) -1;
  245. for(unsigned int i=0;i<polyDegree;i++) {
  246. // First remove the heavy weight bits we dont need
  247. rnd[i]=(rnd[i]&mask);
  248. // When the random is still too large, reduce it
  249. // In order to follow strictly a uniform distribution we should
  250. // get another rnd but in order to follow the proofs of security
  251. // strictly we should also take noise from a gaussian ...
  252. if (rnd[i]>=upperBound_)
  253. {
  254. rnd[i]-=upperBound_;
  255. }
  256. for (unsigned int cm = 0 ; cm < nbModuli ; cm++)
  257. {
  258. res[polyDegree*cm+i] = rnd[i];
  259. }
  260. }
  261. }
  262. else // uniform == true
  263. {
  264. // In uniform mode we need randomness for all the polynomials in the CRT
  265. rnd_orig = (poly64) malloc(polyDegree * nbModuli * sizeof(uint64_t));
  266. // We play with the rnd pointer (in the uniform case), and thus
  267. // we need to remember the allocated pointer to free it at the end
  268. rnd = rnd_orig;
  269. fastrandombytes((unsigned char *)rnd, polyDegree * nbModuli * sizeof(uint64_t));
  270. for (unsigned int cm = 0 ; cm < nbModuli ; cm++)
  271. {
  272. // In the uniform case, instead of getting a big random (within the general moduli),
  273. // We rather prefer, for performance issues, to get smaller randoms for each module
  274. // The mask should be the same for all moduli (because they are the same size)
  275. // But for generality we prefer to compute it for each moduli so that we could have
  276. // moduli of different bitsize
  277. mask=(1ULL << (int)ceil(log2(moduli[cm]))) -1;
  278. for(unsigned int i=0;i<polyDegree;i++)
  279. {
  280. // First remove the heavy weight bits we dont need
  281. rnd[i]=(rnd[i]&mask);
  282. // When the random is still too large, reduce it
  283. if (rnd[i]>=moduli[cm])
  284. {
  285. rnd[i]-=moduli[cm];
  286. }
  287. res[i] = rnd[i];
  288. }
  289. rnd+=polyDegree;
  290. res+=polyDegree;
  291. }
  292. }
  293. free(rnd_orig);
  294. }
  295. // *********************************************************
  296. // Data import and export main functions
  297. // *********************************************************
  298. // Takes an array of buffers and:
  299. // 1) Converts them into a set of polynomials with arbitrary large coefficients
  300. // 2) Reduces the polys through CRT to have nbModuli contiguous polys with uint64_t coefficients
  301. // 3) Does the NTT transform
  302. // - inArrayOfBuffers array of buffers to take the bits from
  303. // - nbrOfBuffers nbr of buffers in the array
  304. // - dataBitsizePerBuffer bits that can be taken from each buffer
  305. // - bitsPerCoordinate bits used to create each coefficient (can be > 64 !)
  306. // - polyNumber set by the function to say how many polynomials are in the returned pointer
  307. poly64 *NFLlib::deserializeDataNFL(unsigned char **inArrayOfBuffers, uint64_t nbrOfBuffers, uint64_t dataBitsizePerBuffer, unsigned bitsPerCoordinate, uint64_t &polyNumber) {
  308. // We need to handle dataBitsize bits of data per buffer
  309. // each poly can take publicParams.getAbsorptionBitsize() bits so
  310. polyNumber = ceil((double)dataBitsizePerBuffer*(double)nbrOfBuffers/(double)(bitsPerCoordinate*polyDegree));
  311. // The uint64_t arrays are allocated and filled with zeros
  312. // So that we do not have to pad with zeros beyond the limit
  313. poly64* deserData = (poly64 *) calloc(polyNumber, sizeof(poly64));
  314. // bitsplitter does all the hard work WITHOUT using large numbers !
  315. deserData[0] = bitsplitter(inArrayOfBuffers, nbrOfBuffers, dataBitsizePerBuffer, bitsPerCoordinate);
  316. // We finish the work by applying the NTT transform
  317. #ifdef MULTI_THREAD
  318. #pragma omp parallel for
  319. #endif
  320. for (unsigned int i = 0 ; i < polyNumber ; i++)
  321. {
  322. deserData[i] = deserData[0]+i*nbModuli*polyDegree;
  323. #ifndef SIMULATE_PRE_NTT_DATA
  324. nttAndPowPhi(deserData[i]);
  325. #endif
  326. }
  327. return deserData;
  328. }
  329. // Serialize an array of poly64 elements into a compact byte buffer
  330. // Takes a set of polynomial coefficients and outputs their concatenation
  331. // - indata points to the polynomial coefficients
  332. // - outdata points to the concatenation obtained
  333. // - bitsPerChunk defines how many bits has each coefficient
  334. // - nb_of_uint64 defines how many coefficients must be concatenated
  335. // ASSUMPTION: all the polynomials are contiguously allocated
  336. // ASSUMPTION: outdata has allocated one more uint64_t than needed
  337. // ASSUMPTION: all the coefficients have the same size which is below 56 bits
  338. void NFLlib::serializeData64 (uint64_t* indata, unsigned char* outdata, unsigned int bitsPerChunk, uint64_t nb_of_uint64)
  339. {
  340. unsigned char *tmppointer;
  341. uint64_t *pointer64;
  342. pointer64 = (uint64_t *) outdata;
  343. uint32_t bitswritten=0;
  344. // Tricky approach playing with pointers to be able to infinitely add
  345. // up to 56 bits to any bit string present
  346. for (uint64_t i = 0 ; i < nb_of_uint64 ;)
  347. {
  348. while(bitswritten + bitsPerChunk <= 64)
  349. {
  350. *pointer64 |= (*indata++)<<bitswritten; i++;
  351. bitswritten += bitsPerChunk;
  352. if(i==nb_of_uint64) break;
  353. }
  354. tmppointer = (unsigned char*) pointer64;
  355. tmppointer+=bitswritten>>3;
  356. pointer64 = (uint64_t *) (tmppointer);
  357. bitswritten -=8*(bitswritten>>3);
  358. }
  359. }
  360. // Serialize an array of poly64 elements into a compact byte buffer
  361. // Takes a set of polynomial coefficients and outputs their concatenation
  362. // - indata points to the polynomial coefficients
  363. // - outdata points to the concatenation obtained
  364. // - bitsPerChunk defines how many bits has each coefficient
  365. // - nb_of_uint64 defines how many coefficients must be concatenated
  366. // ASSUMPTION: all the polynomials are contiguously allocated
  367. // ASSUMPTION: outdata has allocated one more uint64_t than needed
  368. // IMPORTANT NOTE: Unlike in serializeData64 bitsPerChunk can be arbitrarily large. This function considers that large coefficients are retrieved by blocks of varying size up to 32 bit. For example 100-bit coefficients will be retrieved by three blocks of 32 bits and a block of 4 looping that way for each 100-bit coefficient.
  369. void NFLlib::serializeData32 (uint32_t* indata, unsigned char* outdata, unsigned int bitsPerChunk, uint64_t nb_of_uint32){
  370. unsigned char *tmppointer;
  371. uint64_t *pointer64;
  372. pointer64 = (uint64_t *) outdata;
  373. uint32_t bitswritten=0;
  374. // See through how many block (=sub-chunk) we will have to loop to get each coefficient (=chunk)
  375. const double uint32PerChunk = (double)bitsPerChunk/32;
  376. const uint64_t int_uint32PerChunk = ceil(uint32PerChunk);
  377. const bool isint_uint32PerChunk = (uint32PerChunk==(double)int_uint32PerChunk);
  378. // Build masks for each sub-chunk
  379. uint64_t subchunkMasks[int_uint32PerChunk];
  380. unsigned int subchunkSizes[int_uint32PerChunk];
  381. // Increment with subchunkIndex=((subchunkIndex+1)%int_uint64PerChunk)
  382. // and use with subchunkSizes[subchunkIndex];
  383. unsigned int subchunkIndex = 0;
  384. for (int i = 0 ; i < int_uint32PerChunk - 1 ; i++)
  385. {
  386. subchunkSizes[i]=32;
  387. subchunkMasks[i] = (1ULL<<32)-1; // const mask for extracting 32 bits
  388. }
  389. subchunkSizes[int_uint32PerChunk-1] = bitsPerChunk - 32 * (int_uint32PerChunk - 1);
  390. subchunkMasks[int_uint32PerChunk-1] = (1ULL<<(subchunkSizes[int_uint32PerChunk-1]))-1;
  391. // Apply the same approach than in serializeData64 but with varying sizes
  392. for (uint64_t i = 0 ; i < nb_of_uint32 ;)
  393. {
  394. while(bitswritten + subchunkSizes[subchunkIndex] <= 64)
  395. {
  396. *pointer64 |= ((uint64_t)(*indata++))<<bitswritten; i++;
  397. bitswritten += subchunkSizes[subchunkIndex];
  398. subchunkIndex=((subchunkIndex+1)%int_uint32PerChunk);
  399. if(i==nb_of_uint32) break;
  400. }
  401. tmppointer = (unsigned char*) pointer64;
  402. tmppointer+=bitswritten>>3;
  403. pointer64 = (uint64_t *) (tmppointer);
  404. bitswritten -=8*(bitswritten>>3);
  405. }
  406. }
  407. // *********************************************************
  408. // Helper functions
  409. // *********************************************************
  410. // Allocate a polynomial potentially with all coefficients set to zero if nullpoly = true
  411. poly64 NFLlib::allocpoly(bool nullpoly)
  412. {
  413. if (nullpoly == true) return (poly64) calloc(polyDegree*nbModuli,sizeof(uint64_t));
  414. else return (poly64) malloc(polyDegree*nbModuli*sizeof(uint64_t));
  415. }
  416. // Lift a polynomial in CRT representation, into a polynomial with large integer coefficients
  417. mpz_t* NFLlib::poly2mpz(poly64 p)
  418. {
  419. mpz_t* resultmpz;
  420. mpz_t* tmpzbuffer;
  421. resultmpz=new mpz_t[polyDegree];
  422. tmpzbuffer=new mpz_t[nbModuli];
  423. for(unsigned i=0;i<polyDegree;i++) {
  424. mpz_init2(resultmpz[i],192);
  425. }
  426. for(int cm = 0; cm < nbModuli;cm++) {
  427. mpz_init2(tmpzbuffer[cm],192);
  428. }
  429. for(unsigned i=0;i<polyDegree;i++) {
  430. mpz_set_ui(resultmpz[i],0UL);
  431. for(int cm = 0; cm < nbModuli;cm++) {
  432. mpz_import(tmpzbuffer[cm], 1, 1, sizeof(uint64_t), 0, 0, p+i+polyDegree*cm);
  433. mpz_mul(tmpzbuffer[cm], liftingIntegers[cm], tmpzbuffer[cm]);
  434. mpz_add(resultmpz[i], resultmpz[i], tmpzbuffer[cm]);
  435. }
  436. mpz_mod(resultmpz[i], resultmpz[i], moduliProduct);
  437. }
  438. for(int cm = 0; cm < nbModuli;cm++) {
  439. mpz_clear(tmpzbuffer[cm]);
  440. }
  441. delete[] tmpzbuffer;
  442. return resultmpz;
  443. }
  444. // Debug printing function
  445. void NFLlib::print_poly64hex(poly64 p, unsigned int coeff_nbr)
  446. {
  447. std::cout << "[";
  448. for (unsigned int i = 0 ; i < coeff_nbr ; i++)
  449. {
  450. std::cout << std::hex << (unsigned int)(p[i]>>32)<<(unsigned int) p[i] << " ";
  451. }
  452. std::cout << "]" << std::dec << std::endl;
  453. }
  454. // Debug printing function
  455. void NFLlib::print_poly64(poly64 p, unsigned int coeff_nbr)
  456. {
  457. std::cout << "[";
  458. for (unsigned int i = 0 ; i < coeff_nbr ; i++)
  459. {
  460. std::cout << p[i] << " ";
  461. }
  462. std::cout << "]" << std::endl;
  463. }
  464. // *********************************************************
  465. // Destructors and closing or freeing functions
  466. // *********************************************************
  467. // Destructor
  468. NFLlib::~NFLlib()
  469. {
  470. freeNTTMemory();
  471. }
  472. // Everything is commented out because of hard to predict issues with MacOsX
  473. // Uncomment on a computer with that OS before releasing
  474. void NFLlib::freeNTTMemory(){
  475. // alreadyInit says how many arrays we have allocated for the NTT
  476. for (unsigned i = 0 ; i < alreadyInit; i++)
  477. {
  478. free(phis[i]);
  479. free(shoupphis[i]);
  480. free(invpoly_times_invphis[i]);
  481. free(shoupinvpoly_times_invphis[i]);
  482. free(omegas[i]);
  483. free(invomegas[i]);
  484. mpz_clear(liftingIntegers[i]);
  485. if (i == alreadyInit - 1)
  486. {
  487. free(phis);
  488. free(shoupphis);
  489. free(invpoly_times_invphis);
  490. free(shoupinvpoly_times_invphis);
  491. free(omegas);
  492. free(shoupomegas);
  493. free(invomegas);
  494. free(shoupinvomegas);
  495. free(invpolyDegree);
  496. delete[] liftingIntegers;
  497. free(inv_indexes);
  498. mpz_clear(moduliProduct);
  499. delete[] moduli;
  500. }
  501. }
  502. alreadyInit = 0;
  503. }
  504. // ****************************************************************************************
  505. // THE DEN: Uncommented howling functions and pointer blood magic. Enter at your own risk.
  506. // ****************************************************************************************
  507. // We define first a back to back funtion to test our bitsplitter function
  508. // If DEBUG_BITSPLIT_B2B the function is used, else it is ignored
  509. //#define DEBUG_BITSPLIT_B2B
  510. #ifdef DEBUG_BITSPLIT_B2B
  511. #define DEBUG_BITSPLIT
  512. #define B2BTEST(inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk, totalbitsread, bitsread, pointer64, bitsToRead) bitsplitter_backtoback_internal_test (inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk, totalbitsread, bitsread, pointer64, bitsToRead)
  513. #else
  514. #define B2BTEST(inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk, totalbitsread, bitsread, pointer64, bitsToRead) if(0);
  515. #endif
  516. uint64_t* NFLlib::bitsplitter_backtoback_internal_test (unsigned char** inDataBuffers, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk, uint64_t totalbitsread,uint64_t bitsread, uint64_t* pointer64, unsigned int bitsToRead)
  517. {
  518. unsigned int bufferIndex = totalbitsread/bitsPerBuffer;
  519. uint64_t bitPositionInBuffer = totalbitsread - bufferIndex*bitsPerBuffer;
  520. uint64_t bytePositionInBuffer = bitPositionInBuffer/8;
  521. unsigned int bitPositionInByte = bitPositionInBuffer%8;
  522. if (bitPositionInBuffer+sizeof(uint64_t)*8 > bitsPerBuffer)
  523. {
  524. std::cerr << "WARNING: Bitsplit goes beyond buffer size (let's hope it is allocated)" << std::endl;
  525. }
  526. if (bitPositionInBuffer + bitsToRead > bitsPerBuffer)
  527. {
  528. std::cerr << "CRITICAL: Bitsplit reads AND USES data beyond buffer space" << std::endl;
  529. std::cerr << "totalbitsread " << totalbitsread << std::endl;
  530. std::cerr << "bufferIndex " << bufferIndex << std::endl;
  531. std::cerr << "bitPositionInBuffer " << bitPositionInBuffer << std::endl;
  532. std::cerr << "bytePositionInBuffer " << bytePositionInBuffer << std::endl;
  533. std::cerr << "CRITICAL: On B2B test called for " << bitsToRead << " bits" << std::endl;
  534. exit(-1);
  535. }
  536. uint64_t b2bresult = ((*((uint64_t *)(inDataBuffers[bufferIndex]+bytePositionInBuffer)))>>bitPositionInByte) & ((1ULL<<bitsToRead)-1) ;
  537. uint64_t bitsplitresult = ((*pointer64)>>bitsread) & ((1ULL<<bitsToRead)-1);
  538. if (b2bresult != bitsplitresult)
  539. {
  540. std::cerr << "CRITICAL: Bitsplit different from back to back function" << std::endl;
  541. std::cerr << "CRITICAL: Left " << b2bresult << std::endl;
  542. std::cerr << "CRITICAL: Right " << bitsplitresult << std::endl;
  543. exit(-1);
  544. }
  545. return 0;
  546. }
  547. inline void NFLlib::bs_loop (unsigned char** inDataBuffers, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk, uint64_t *&tmpdata, uint64_t bufferIndex, uint64_t &bitsread, size_t &subchunkIndex)
  548. {
  549. // We redefine the amount of uint64 that can be produced given that we may have already read
  550. uint64_t bitstoread = bitsPerBuffer - bitsread;
  551. double nbChunks = (double)(bitsPerBuffer-bitsread)/bitsPerChunk;
  552. uint64_t int_nbChunks = floor(nbChunks);
  553. // How many uint64_t are needed to encode a chunk
  554. const double uint64PerChunk = (double)bitsPerChunk/56;
  555. const uint64_t int_uint64PerChunk = ceil(uint64PerChunk);
  556. const bool isint_uint64PerChunk = (uint64PerChunk==(double)int_uint64PerChunk);
  557. // Compute subchunk sizes and masks
  558. uint64_t subchunkMasks[int_uint64PerChunk];
  559. // Increment with subchunkIndex=((subchunkIndex+1)%int_uint64PerChunk) and
  560. // use subchunkSizes[subchunkIndex];
  561. unsigned int subchunkSizes[int_uint64PerChunk];
  562. for (unsigned i = 0 ; i < int_uint64PerChunk - 1 ; i++)
  563. {
  564. subchunkSizes[i]=56;
  565. subchunkMasks[i] = (1ULL<<56)-1; // const mask for extracting 56 bits
  566. }
  567. subchunkSizes[int_uint64PerChunk-1] = bitsPerChunk - 56 * (int_uint64PerChunk - 1);
  568. subchunkMasks[int_uint64PerChunk-1] = (1ULL<<(subchunkSizes[int_uint64PerChunk-1]))-1;
  569. #ifdef DEBUG_BITSPLIT
  570. for (int i = 0 ; i < int_uint64PerChunk ; i++)
  571. {
  572. std::cerr<<"bitsplit0 i="<<i<<std::endl;
  573. std::cerr<<"bitsplit0 subchunkSizes[i]="<<subchunkSizes[i]<<std::endl;
  574. std::cerr<<"bitsplit0 subchunkMasks[i]="<<subchunkMasks[i]<<std::endl;
  575. }
  576. #endif
  577. // We compute how many extra subchunks are available taking into account that we may
  578. // start this new buffer on the middle of a chunk
  579. uint64_t supplementalSubchunks = 0;
  580. uint64_t cumulatedsize = 0;
  581. for (unsigned i = 0 ; i < int_uint64PerChunk ; i++)
  582. {
  583. if (cumulatedsize + subchunkSizes[(subchunkIndex + i) % int_uint64PerChunk] <= (bitsPerBuffer-bitsread)-int_nbChunks*bitsPerChunk)
  584. {
  585. supplementalSubchunks++;
  586. cumulatedsize += subchunkSizes[(subchunkIndex + i) % int_uint64PerChunk];
  587. }
  588. else
  589. {
  590. break;
  591. }
  592. }
  593. uint64_t totalSubChunks=int_nbChunks*int_uint64PerChunk + supplementalSubchunks;
  594. #ifdef DEBUG_BITSPLIT
  595. std::cerr<<"bitsplit1 nbrOfBuffers="<<nbrOfBuffers<<std::endl;
  596. std::cerr<<"bitsplit1 nbChunks="<<nbChunks<<std::endl;
  597. std::cerr<<"bitsplit1 int_nbChunks="<<int_nbChunks<<std::endl;
  598. std::cerr<<"bitsplit1 uint64PerChunk="<<uint64PerChunk<<std::endl;
  599. std::cerr<<"bitsplit1 int_uint64PerChunk="<<int_uint64PerChunk<<std::endl;
  600. std::cerr<<"bitsplit1 isint_uint64PerChunk="<<isint_uint64PerChunk<<std::endl;
  601. std::cerr<<"bitsplit1 totalSubChunks="<<totalSubChunks<<std::endl;
  602. std::cerr<<"bitsplit1 cumulatedsize " << cumulatedsize << std::endl;
  603. std::cerr<<"bitsplit1 bitsPerBuffer " << bitsPerBuffer << std::endl;
  604. std::cerr<<"bitsplit1 bitsread " << bitsread << std::endl;
  605. std::cerr<<"bitsplit1 nextsubchunksize " << subchunkSizes[(subchunkIndex)]<< std::endl;
  606. std::cerr<<"bitsplit1 nextsubchunk " << subchunkIndex<< std::endl;
  607. #endif
  608. unsigned char *tmppointer;
  609. uint64_t *pointer64;
  610. pointer64 = (uint64_t *) inDataBuffers[bufferIndex];
  611. uint64_t bitsremaining=0;
  612. // Loop over the subchunks in the current buffer
  613. for (uint64_t i = 0 ; i < totalSubChunks ; )
  614. {
  615. // Get up to 64bits
  616. while (bitsread + subchunkSizes[subchunkIndex] <= 64)
  617. {
  618. *tmpdata = ((*pointer64)>>bitsread) & subchunkMasks[subchunkIndex];
  619. tmpdata++;i++;
  620. bitsread += subchunkSizes[subchunkIndex];
  621. subchunkIndex= (subchunkIndex+1 == int_uint64PerChunk ? 0 : subchunkIndex + 1);
  622. if(i==totalSubChunks) break;
  623. }
  624. if (bitstoread > 128)
  625. {
  626. unsigned shift = bitsread >>3;
  627. tmppointer = (unsigned char*) pointer64;
  628. tmppointer += shift;
  629. pointer64 = (uint64_t *) (tmppointer);
  630. bitstoread-= shift<<3;
  631. bitsread -= shift<<3;
  632. }
  633. else
  634. {
  635. tmppointer = (unsigned char*) pointer64;
  636. while ((64 - bitsread < subchunkSizes[subchunkIndex]) && bitstoread > 0)
  637. {
  638. tmppointer++;
  639. bitsread -= 8;
  640. bitstoread -= 8;
  641. }
  642. pointer64 = (uint64_t *) (tmppointer);
  643. }
  644. }
  645. // If there is a last partial subchunk in this buffer, read it part from this buffer and part
  646. // from next buffer if available
  647. bitsremaining = (uint64_t) round((nbChunks-int_nbChunks)*bitsPerChunk - cumulatedsize );
  648. #ifdef DEBUG_BITSPLIT
  649. std::cout<<"bitsplit2 bitsremaining (should be <56)="<<bitsremaining<<std::endl;
  650. #endif
  651. if (bitsremaining !=0)
  652. {
  653. size_t shift=(64-(bitsread+bitsremaining))/8;
  654. bitsread+=(shift<<3);
  655. tmppointer = (unsigned char*) pointer64;
  656. tmppointer-=shift;
  657. pointer64 = (uint64_t *) (tmppointer);
  658. *tmpdata = ((*pointer64)>>bitsread) & ((1ULL<<bitsremaining)-1);
  659. // If there is another buffer to deal with, finish the current tmpdata uint64_t
  660. if (bufferIndex < nbrOfBuffers - 1)
  661. {
  662. pointer64 = (uint64_t *) inDataBuffers[bufferIndex+1];
  663. *tmpdata |= ((*pointer64)<<bitsremaining) & subchunkMasks[subchunkIndex];
  664. // We restart bitsread to the bits read in the new buffer
  665. bitsread = subchunkSizes[subchunkIndex] - bitsremaining;
  666. subchunkIndex= (subchunkIndex+1 == int_uint64PerChunk ? 0 : subchunkIndex + 1);
  667. tmpdata++;
  668. }
  669. }
  670. else
  671. {
  672. bitsread = 0;
  673. }
  674. }
  675. inline void NFLlib::bs_finish(poly64 &outdata, uint64_t int_uint64PerChunk, uint64_t polyNumber, uint64_t* splitData, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk)
  676. {
  677. if(int_uint64PerChunk>1) {
  678. outdata=(poly64) calloc(polyNumber*nbModuli*polyDegree + 1,sizeof(uint64_t));
  679. internalLongIntegersToCRT( splitData, outdata, int_uint64PerChunk, ceil(((double)bitsPerBuffer*nbrOfBuffers)/bitsPerChunk));
  680. free(splitData);
  681. }
  682. else
  683. {
  684. if (nbModuli > 1)
  685. {
  686. outdata=(poly64) calloc(polyNumber*nbModuli*polyDegree + 1,sizeof(uint64_t));
  687. for (unsigned i = 0 ; i < polyNumber ; i++)
  688. {
  689. for (int cm = 0 ; cm < nbModuli ; cm++)
  690. {
  691. memcpy(outdata + i*polyDegree*nbModuli + cm*polyDegree,
  692. splitData + i*polyDegree, polyDegree*sizeof(uint64_t));
  693. }
  694. }
  695. free(splitData);
  696. }
  697. else
  698. {
  699. outdata=splitData;
  700. }
  701. }
  702. }
  703. // This function does all the hard work of deserializeDataNFL
  704. // 1) Converts input into a set of polynomials with arbitrary large coefficients
  705. // 2) Reduces the polys through CRT to have nbModuli contiguous polys with uint64_t coefficients
  706. // Do not try to understand it, it is a nightmare, we won't try to explain it :)
  707. uint64_t* NFLlib::bitsplitter (unsigned char** inDataBuffers, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk)
  708. {
  709. // If you don't need to change me don't try to understand me
  710. // If you need to change me, build me again from scratch :)
  711. // How many uint64_t are needed to encode a chunk
  712. const double uint64PerChunk = (double)bitsPerChunk/56;
  713. const uint64_t int_uint64PerChunk = ceil(uint64PerChunk);
  714. // How many polynomials are needed to encode the data
  715. uint64_t polyNumber =
  716. ceil((double)bitsPerBuffer*(double)nbrOfBuffers/(double)(bitsPerChunk*polyDegree));
  717. uint64_t* splitData =
  718. (uint64_t*)(calloc(polyNumber*polyDegree*int_uint64PerChunk+1,sizeof(uint64_t)));
  719. uint64_t* tmpdata=splitData;
  720. uint64_t bitsread=0;
  721. size_t subchunkIndex=0;
  722. // Loop over the buffers
  723. for (uint64_t h = 0 ; h < nbrOfBuffers ; h++)
  724. {
  725. bs_loop (inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk,
  726. tmpdata, h, bitsread, subchunkIndex);
  727. }
  728. poly64 outdata;
  729. bs_finish(outdata, int_uint64PerChunk, polyNumber, splitData, nbrOfBuffers, bitsPerBuffer, bitsPerChunk);
  730. return outdata;
  731. }
  732. // Subroutine for bitsplitter, the daemonic function. This is the function that allows
  733. // us to circumvect GMP
  734. void NFLlib::internalLongIntegersToCRT(uint64_t* tmpdata, poly64 outdata, uint64_t int_uint64PerChunk, uint64_t totalNbChunks)
  735. {
  736. uint64_t* outdataPtr=outdata;
  737. uint64_t* indataPtr=tmpdata;
  738. uint64_t multiplier[nbModuli][int_uint64PerChunk];
  739. uint64_t* chunkParts[int_uint64PerChunk];
  740. for(int cm=0;cm<nbModuli;cm++)
  741. {
  742. multiplier[cm][0]=1;
  743. for(unsigned j=1;j<int_uint64PerChunk;j++)
  744. {
  745. multiplier[cm][j] = mulmod(multiplier[cm][j-1],1ULL<<56,moduli[cm]);
  746. }
  747. }
  748. for(unsigned i=0;i<totalNbChunks;i++)
  749. {
  750. for(unsigned j=0;j<int_uint64PerChunk;j++)
  751. {
  752. for(int cm=0;cm<nbModuli;cm++)
  753. {
  754. // set to zero before computation if not calloc'd
  755. *(outdataPtr+cm*polyDegree) += mulmod(*(indataPtr+j), multiplier[cm][j],moduli[cm]);
  756. }
  757. }
  758. indataPtr+=int_uint64PerChunk;
  759. outdataPtr++;
  760. if((i+1)%polyDegree==0)
  761. {
  762. outdataPtr += polyDegree*(nbModuli-1);
  763. }
  764. }
  765. }
  766. // ****************************************************************************************
  767. // YOU EXITED THE DEN ALIVE! Either you jumped a lot of lines,
  768. // or you became completely insane ... in the latter case ...
  769. // Welcome to the group!
  770. // ****************************************************************************************