NFLlib.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922
  1. /* Copyright (C) 2014 Carlos Aguilar Melchor, Joris Barrier, Marc-Olivier Killijian
  2. * This file is part of XPIR.
  3. *
  4. * XPIR is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * XPIR is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with XPIR. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include "NFLlib.hpp"
  18. #include <x86intrin.h>
  19. void DEBUG_MESSAGE(const char *s, poly64 p, unsigned int n){
  20. #ifdef DEBUG
  21. std::cout<<s;
  22. NFLlib::print_poly64hex(p,n);
  23. #endif
  24. }
  25. // *********************************************************
  26. // Constructors and init functions
  27. // *********************************************************
  28. // The constructors are not able to set all the parameters and setNewParameters has to be called
  29. // after them. The attribute alreadyInit reflects this uninitialized status.
  30. NFLlib::NFLlib():
  31. alreadyInit(0),
  32. nbModuli(0),
  33. polyDegree(0)
  34. {
  35. }
  36. // Compute all data needed to do NTT operations
  37. // Depends on the moduli used and polyDegree
  38. void NFLlib::configureNTT()
  39. {
  40. uint64_t phi, invphi, omega, invomega, temp;
  41. // If this is a reconfiguration free memory before reallocating it
  42. freeNTTMemory();
  43. // Allocate space for the first dimension of the arrays needed for the NTT and CRT
  44. phis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  45. shoupphis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  46. invpoly_times_invphis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  47. shoupinvpoly_times_invphis = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  48. omegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  49. shoupomegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  50. invomegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  51. shoupinvomegas = (uint64_t **) malloc(nbModuli * sizeof(uint64_t *));
  52. invpolyDegree = (uint64_t *) malloc(nbModuli * sizeof(uint64_t));
  53. liftingIntegers = new mpz_t[nbModuli];
  54. // From now on, we have to do everything nbModuli times
  55. for(unsigned short currentModulus=0;currentModulus<nbModuli;currentModulus++)
  56. {
  57. // Define the moduli we use (useful ?)
  58. moduli[currentModulus] = P64[currentModulus];
  59. // Allocation of the second dimension of the NTT parameters
  60. phis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  61. shoupphis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  62. invpoly_times_invphis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  63. shoupinvpoly_times_invphis[currentModulus] = (uint64_t *) malloc(polyDegree * sizeof(uint64_t));
  64. omegas[currentModulus] = (uint64_t *) malloc((2 * polyDegree) * sizeof(uint64_t));
  65. shoupomegas[currentModulus] = omegas[currentModulus] + polyDegree;
  66. invomegas[currentModulus] = (uint64_t *) malloc((2 * polyDegree) * sizeof(uint64_t));
  67. shoupinvomegas[currentModulus] = invomegas[currentModulus] + polyDegree;
  68. // We start by computing phi
  69. // The roots in the array are primitve 2**14-th roots
  70. // Squared 14-log2(polyDegree) times they become
  71. // polyDegree-th roots as required by the NTT
  72. // But first we get phi = sqrt(omega) squaring them 13-log2(polyDegree) times
  73. phi = primitive_roots[currentModulus];
  74. for (unsigned int i = 0 ; i < 13 - log2(polyDegree) ; i++)
  75. {
  76. phi = mulmod(phi, phi, moduli[currentModulus]);
  77. }
  78. // Now that temp = phi we initialize the array of phi**i values
  79. // Initialized to phi**0
  80. temp = 1;
  81. for (unsigned int i = 0 ; i < polyDegree ; i++)
  82. {
  83. phis[currentModulus][i] = temp;
  84. shoupphis[currentModulus][i] = ((uint128_t) temp << 64) / moduli[currentModulus];
  85. // phi**(i+1)
  86. temp = mulmod(temp, phi, moduli[currentModulus]);
  87. }
  88. // At the end of the loop temp = phi**polyDegree
  89. // Computation of invphi
  90. // phi**(2*polydegree)=1 -> temp*phi**(polyDegree-1) = phi**(-1)
  91. invphi = mulmod(temp, phis[currentModulus][polyDegree-1], moduli[currentModulus]);
  92. // Computation of the inverse of polyDegree using the inverse of kMaxPolyDegree
  93. invpolyDegree[currentModulus] = mulmod(invkMaxPolyDegree[currentModulus],
  94. kMaxPolyDegree/polyDegree, moduli[currentModulus]);
  95. // Now we can compute the table invpoly_times_invphis
  96. temp = invpolyDegree[currentModulus];
  97. for (unsigned int i = 0 ; i < polyDegree ; i++)
  98. {
  99. invpoly_times_invphis[currentModulus][i] = temp;
  100. shoupinvpoly_times_invphis[currentModulus][i] = ((uint128_t) temp << 64)
  101. / moduli[currentModulus];
  102. // This is invpolyDegree*invphi**(i+1)
  103. temp = mulmod(temp, invphi, moduli[currentModulus]);
  104. }
  105. // For the omegas it is easy, we just use the function of David Harvey modified for our needs
  106. omega = mulmod(phi, phi, moduli[currentModulus]);
  107. prep_wtab(omegas[currentModulus], shoupomegas[currentModulus], omega, polyDegree,
  108. moduli[currentModulus]);
  109. // And again for the invomegas
  110. invomega = mulmod(invphi, invphi, moduli[currentModulus]);
  111. prep_wtab(invomegas[currentModulus], shoupinvomegas[currentModulus], invomega, polyDegree,
  112. moduli[currentModulus]);
  113. // Inverse-CRT constants
  114. mpz_t tmpz, mpz_inverse;
  115. mpz_init(tmpz);
  116. mpz_init(mpz_inverse);
  117. // Compute first the product of all moduli but the current
  118. mpz_init_set_ui(liftingIntegers[currentModulus],1UL);
  119. for (unsigned int i = 0 ; i < nbModuli ; i++)
  120. {
  121. if (i == currentModulus) continue;
  122. mpz_import(tmpz, 1, 1, sizeof(uint64_t), 0, 0, &P64[i]);
  123. mpz_mul(liftingIntegers[currentModulus], liftingIntegers[currentModulus], tmpz);
  124. }
  125. // Compute the inverse of the product modulo the current modulus and multiply it with the product
  126. mpz_import(tmpz, 1, 1, sizeof(uint64_t), 0, 0, &moduli[currentModulus]);
  127. mpz_invert(mpz_inverse, liftingIntegers[currentModulus], tmpz);
  128. mpz_mul(liftingIntegers[currentModulus], liftingIntegers[currentModulus], mpz_inverse);
  129. mpz_clear(tmpz);
  130. mpz_clear(mpz_inverse);
  131. }
  132. // Compute the product of all moduli
  133. mpz_t tmpz;
  134. mpz_init(tmpz);
  135. mpz_init_set_ui(moduliProduct,1UL);
  136. for (unsigned int i = 0 ; i < nbModuli ; i++)
  137. {
  138. mpz_import(tmpz, 1, 1, sizeof(uint64_t), 0, 0, &moduli[i]);
  139. mpz_mul(moduliProduct, moduliProduct, tmpz);
  140. }
  141. mpz_clear(tmpz);
  142. uint64_t* inv_indexes_tmp = malloc_align<32, uint64_t>(polyDegree);
  143. inv_indexes = malloc_align<16>(polyDegree, inv_indexes);
  144. // Compute permutation indexes for inv_ntt
  145. for (size_t i = 0; i < polyDegree; i++)
  146. {
  147. size_t ii = i, r = 0;
  148. for (unsigned h = 1; h < polyDegree; h=h<<1)
  149. {
  150. r = (r << 1) | (ii & 1);
  151. ii >>= 1;
  152. }
  153. inv_indexes_tmp[i] = r;
  154. }
  155. // Invert the previous permutation. I think we can do better than that :)
  156. for (size_t i = 0; i < polyDegree; i++) {
  157. for (size_t j = 0; j < polyDegree; j++) {
  158. if (inv_indexes_tmp[j] == i) {
  159. inv_indexes[i] = j;
  160. break;
  161. }
  162. }
  163. }
  164. free(inv_indexes_tmp);
  165. alreadyInit = nbModuli;
  166. }
  167. // *********************************************************************
  168. // Fundamental setters (resulting on a call to the init function above)
  169. // *********************************************************************
  170. // Set or reset the degrees of the polynomials used and the modulus size (which fixes the modulus)
  171. void NFLlib::setNewParameters(unsigned int polyDegree_, unsigned int aggregatedModulusBitsize_)
  172. {
  173. // We don't use setpolyDegree to avoid configureNTT being called twice
  174. polyDegree = polyDegree_;
  175. // We use a special function for setting the modulus as it can be called independently
  176. // and requires some processing. This function also ensures that the NTT params are configured.
  177. setmodulus(aggregatedModulusBitsize_);
  178. }
  179. // Sets the modulus size AND configures NTT parmeters (which fixes a given modulus)
  180. void NFLlib::setmodulus(uint64_t aggregatedModulusBitsize_)
  181. {
  182. // For the CRT, from the aggregated modulus bitsize, we compute the number of necessary moduli
  183. if (aggregatedModulusBitsize_ % kModulusBitsize != 0)
  184. {
  185. std::cout << "NFLlib: CRITICAL. Modulus of " << aggregatedModulusBitsize_
  186. << " requested but only integer multiples of " << kModulusBitsize << " bits implemented. Exiting ..." << std::endl;
  187. exit(-1);
  188. }
  189. nbModuli=aggregatedModulusBitsize_/kModulusBitsize;
  190. moduli=new uint64_t[nbModuli]();
  191. configureNTT();
  192. }
  193. // Sets polyDegree AND configures NTT
  194. void NFLlib::setpolyDegree(unsigned int polyDegree_)
  195. {
  196. polyDegree = polyDegree_;
  197. configureNTT();
  198. }
  199. // *********************************************************
  200. // Getters
  201. // *********************************************************
  202. uint64_t* NFLlib::getmoduli() { return moduli; }
  203. unsigned short NFLlib::getnbModuli() { return nbModuli; }
  204. unsigned int NFLlib::getpolyDegree() { return polyDegree; }
  205. void NFLlib::copymoduliProduct(mpz_t dest) { mpz_init_set(dest, moduliProduct); }
  206. // **************************************
  207. // Random polynomial generation functions
  208. // **************************************
  209. // Two modes uniform or bounded (if uniform is false)
  210. // WARNING : The bounded mode only works for a bound
  211. // below the smaller of the moduli -> we use or for bounded noise
  212. // Allocates and sets a bounded random polynomial in FFT form calling setBoundedRandomPoly
  213. poly64 NFLlib::allocBoundedRandomPoly(uint64_t upperBound_, bool uniform_)
  214. {
  215. poly64 res = (poly64)calloc(polyDegree * nbModuli, sizeof(uint64_t));
  216. setBoundedRandomPoly(res, upperBound_, uniform_);
  217. return res;
  218. }
  219. // Sets a pre-allocated random polynomial in FFT form
  220. // If uniform = true upperBound is ignored and the coefficients are uniformly random
  221. // ASSUMPTION: if uniform = false upperBound is below all of the moduli used
  222. void NFLlib::setBoundedRandomPoly(poly64 res, uint64_t upperBound_, bool uniform_)
  223. {
  224. poly64 rnd, rnd_orig;
  225. uint64_t mask;
  226. if (uniform_ == false){
  227. // In bounded mode upperBound must be below the smaller of the moduli
  228. for (unsigned int cm = 0 ; cm < nbModuli ; cm++)
  229. {
  230. if (upperBound_ >= moduli[cm])
  231. {
  232. std::cout << "NFLlib: upperBound is larger than the moduli in setBoundedRandomPoly.";
  233. std::cout << " Unpredictable results ..." << std::endl;
  234. break;
  235. }
  236. }
  237. // We play with the rnd pointer (in the uniform case), and thus
  238. // we need to remember the allocated pointer to free it at the end
  239. rnd_orig = (poly64) malloc(polyDegree * sizeof(uint64_t));
  240. rnd = rnd_orig;
  241. // Get some randomness from the PRNG
  242. fastrandombytes((unsigned char *)rnd, polyDegree * sizeof(uint64_t));
  243. // upperBound is below the moduli so we create the same mask for all the moduli
  244. mask=(1ULL << (unsigned int)ceil(log2(upperBound_))) -1;
  245. for(unsigned int i=0;i<polyDegree;i++) {
  246. // First remove the heavy weight bits we dont need
  247. rnd[i]=(rnd[i]&mask);
  248. // When the random is still too large, reduce it
  249. // In order to follow strictly a uniform distribution we should
  250. // get another rnd but in order to follow the proofs of security
  251. // strictly we should also take noise from a gaussian ...
  252. if (rnd[i]>=upperBound_)
  253. {
  254. rnd[i]-=upperBound_;
  255. }
  256. for (unsigned int cm = 0 ; cm < nbModuli ; cm++)
  257. {
  258. res[polyDegree*cm+i] = rnd[i];
  259. }
  260. }
  261. }
  262. else // uniform == true
  263. {
  264. // In uniform mode we need randomness for all the polynomials in the CRT
  265. rnd_orig = (poly64) malloc(polyDegree * nbModuli * sizeof(uint64_t));
  266. // We play with the rnd pointer (in the uniform case), and thus
  267. // we need to remember the allocated pointer to free it at the end
  268. rnd = rnd_orig;
  269. fastrandombytes((unsigned char *)rnd, polyDegree * nbModuli * sizeof(uint64_t));
  270. for (unsigned int cm = 0 ; cm < nbModuli ; cm++)
  271. {
  272. // In the uniform case, instead of getting a big random (within the general moduli),
  273. // We rather prefer, for performance issues, to get smaller randoms for each module
  274. // The mask should be the same for all moduli (because they are the same size)
  275. // But for generality we prefer to compute it for each moduli so that we could have
  276. // moduli of different bitsize
  277. mask=(1ULL << (int)ceil(log2(moduli[cm]))) -1;
  278. for(unsigned int i=0;i<polyDegree;i++)
  279. {
  280. // First remove the heavy weight bits we dont need
  281. rnd[i]=(rnd[i]&mask);
  282. // When the random is still too large, reduce it
  283. if (rnd[i]>=moduli[cm])
  284. {
  285. rnd[i]-=moduli[cm];
  286. }
  287. res[i] = rnd[i];
  288. }
  289. rnd+=polyDegree;
  290. res+=polyDegree;
  291. }
  292. }
  293. free(rnd_orig);
  294. }
  295. // *********************************************************
  296. // Data import and export main functions
  297. // *********************************************************
  298. // Takes an array of buffers and:
  299. // 1) Converts them into a set of polynomials with arbitrary large coefficients
  300. // 2) Reduces the polys through CRT to have nbModuli contiguous polys with uint64_t coefficients
  301. // 3) Does the NTT transform
  302. // - inArrayOfBuffers array of buffers to take the bits from
  303. // - nbrOfBuffers nbr of buffers in the array
  304. // - dataBitsizePerBuffer bits that can be taken from each buffer
  305. // - bitsPerCoordinate bits used to create each coefficient (can be > 64 !)
  306. // - polyNumber set by the function to say how many polynomials are in the returned pointer
  307. poly64 *NFLlib::deserializeDataNFL(unsigned char **inArrayOfBuffers, uint64_t nbrOfBuffers, uint64_t dataBitsizePerBuffer, unsigned bitsPerCoordinate, uint64_t &polyNumber) {
  308. // We need to handle dataBitsize bits of data per buffer
  309. // each poly can take publicParams.getAbsorptionBitsize() bits so
  310. polyNumber = ceil((double)dataBitsizePerBuffer*(double)nbrOfBuffers/(double)(bitsPerCoordinate*polyDegree));
  311. // The uint64_t arrays are allocated and filled with zeros
  312. // So that we do not have to pad with zeros beyond the limit
  313. poly64* deserData = (poly64 *) calloc(polyNumber, sizeof(poly64));
  314. // bitsplitter does all the hard work WITHOUT using large numbers !
  315. deserData[0] = bitsplitter(inArrayOfBuffers, nbrOfBuffers, dataBitsizePerBuffer, bitsPerCoordinate);
  316. // We finish the work by applying the NTT transform
  317. #ifdef MULTI_THREAD
  318. #pragma omp parallel for
  319. #endif
  320. for (unsigned int i = 0 ; i < polyNumber ; i++)
  321. {
  322. deserData[i] = deserData[0]+i*nbModuli*polyDegree;
  323. #ifndef SIMULATE_PRE_NTT_DATA
  324. nttAndPowPhi(deserData[i]);
  325. #endif
  326. }
  327. return deserData;
  328. }
  329. // Serialize an array of poly64 elements into a compact byte buffer
  330. // Takes a set of polynomial coefficients and outputs their concatenation
  331. // - indata points to the polynomial coefficients
  332. // - outdata points to the concatenation obtained
  333. // - bitsPerChunk defines how many bits has each coefficient
  334. // - nb_of_uint64 defines how many coefficients must be concatenated
  335. // ASSUMPTION: all the polynomials are contiguously allocated
  336. // ASSUMPTION: outdata has allocated one more uint64_t than needed
  337. // ASSUMPTION: all the coefficients have the same size which is below 56 bits
  338. void NFLlib::serializeData64 (uint64_t* indata, unsigned char* outdata, unsigned int bitsPerChunk, uint64_t nb_of_uint64)
  339. {
  340. unsigned char *tmppointer;
  341. uint64_t *pointer64;
  342. pointer64 = (uint64_t *) outdata;
  343. uint32_t bitswritten=0;
  344. // Tricky approach playing with pointers to be able to infinitely add
  345. // up to 56 bits to any bit string present
  346. for (uint64_t i = 0 ; i < nb_of_uint64 ;)
  347. {
  348. while(bitswritten + bitsPerChunk <= 64)
  349. {
  350. *pointer64 |= (*indata++)<<bitswritten; i++;
  351. bitswritten += bitsPerChunk;
  352. if(i==nb_of_uint64) break;
  353. }
  354. tmppointer = (unsigned char*) pointer64;
  355. tmppointer+=bitswritten>>3;
  356. pointer64 = (uint64_t *) (tmppointer);
  357. bitswritten -=8*(bitswritten>>3);
  358. }
  359. }
  360. // Serialize an array of poly64 elements into a compact byte buffer
  361. // Takes a set of polynomial coefficients and outputs their concatenation
  362. // - indata points to the polynomial coefficients
  363. // - outdata points to the concatenation obtained
  364. // - bitsPerChunk defines how many bits has each coefficient
  365. // - nb_of_uint64 defines how many coefficients must be concatenated
  366. // ASSUMPTION: all the polynomials are contiguously allocated
  367. // ASSUMPTION: outdata has allocated one more uint64_t than needed
  368. // IMPORTANT NOTE: Unlike in serializeData64 bitsPerChunk can be arbitrarily large. This function considers that large coefficients are retrieved by blocks of varying size up to 32 bit. For example 100-bit coefficients will be retrieved by three blocks of 32 bits and a block of 4 looping that way for each 100-bit coefficient.
  369. void NFLlib::serializeData32 (uint32_t* indata, unsigned char* outdata, unsigned int bitsPerChunk, uint64_t nb_of_uint32){
  370. unsigned char *tmppointer;
  371. uint64_t *pointer64;
  372. pointer64 = (uint64_t *) outdata;
  373. uint32_t bitswritten=0;
  374. // See through how many block (=sub-chunk) we will have to loop to get each coefficient (=chunk)
  375. const double uint32PerChunk = (double)bitsPerChunk/32;
  376. const uint64_t int_uint32PerChunk = ceil(uint32PerChunk);
  377. const bool isint_uint32PerChunk = (uint32PerChunk==(double)int_uint32PerChunk);
  378. // Build masks for each sub-chunk
  379. uint64_t subchunkMasks[int_uint32PerChunk];
  380. unsigned int subchunkSizes[int_uint32PerChunk];
  381. // Increment with subchunkIndex=((subchunkIndex+1)%int_uint64PerChunk)
  382. // and use with subchunkSizes[subchunkIndex];
  383. unsigned int subchunkIndex = 0;
  384. for (int i = 0 ; i < int_uint32PerChunk - 1 ; i++)
  385. {
  386. subchunkSizes[i]=32;
  387. subchunkMasks[i] = (1ULL<<32)-1; // const mask for extracting 32 bits
  388. }
  389. subchunkSizes[int_uint32PerChunk-1] = bitsPerChunk - 32 * (int_uint32PerChunk - 1);
  390. subchunkMasks[int_uint32PerChunk-1] = (1ULL<<(subchunkSizes[int_uint32PerChunk-1]))-1;
  391. // Apply the same approach than in serializeData64 but with varying sizes
  392. for (uint64_t i = 0 ; i < nb_of_uint32 ;)
  393. {
  394. while(bitswritten + subchunkSizes[subchunkIndex] <= 64)
  395. {
  396. *pointer64 |= ((uint64_t)(*indata++))<<bitswritten; i++;
  397. bitswritten += subchunkSizes[subchunkIndex];
  398. subchunkIndex=((subchunkIndex+1)%int_uint32PerChunk);
  399. if(i==nb_of_uint32) break;
  400. }
  401. tmppointer = (unsigned char*) pointer64;
  402. tmppointer+=bitswritten>>3;
  403. pointer64 = (uint64_t *) (tmppointer);
  404. bitswritten -=8*(bitswritten>>3);
  405. }
  406. }
  407. // *********************************************************
  408. // Helper functions
  409. // *********************************************************
  410. // Allocate a polynomial potentially with all coefficients set to zero if nullpoly = true
  411. poly64 NFLlib::allocpoly(bool nullpoly)
  412. {
  413. if (nullpoly == true) return (poly64) calloc(polyDegree*nbModuli,sizeof(uint64_t));
  414. else return (poly64) malloc(polyDegree*nbModuli*sizeof(uint64_t));
  415. }
  416. // Lift a polynomial in CRT representation, into a polynomial with large integer coefficients
  417. mpz_t* NFLlib::poly2mpz(poly64 p)
  418. {
  419. mpz_t* resultmpz;
  420. mpz_t* tmpzbuffer;
  421. resultmpz=new mpz_t[polyDegree];
  422. tmpzbuffer=new mpz_t[nbModuli];
  423. for(unsigned i=0;i<polyDegree;i++) {
  424. mpz_init2(resultmpz[i],192);
  425. }
  426. for(int cm = 0; cm < nbModuli;cm++) {
  427. mpz_init2(tmpzbuffer[cm],192);
  428. }
  429. for(unsigned i=0;i<polyDegree;i++) {
  430. mpz_set_ui(resultmpz[i],0UL);
  431. for(int cm = 0; cm < nbModuli;cm++) {
  432. mpz_import(tmpzbuffer[cm], 1, 1, sizeof(uint64_t), 0, 0, p+i+polyDegree*cm);
  433. mpz_mul(tmpzbuffer[cm], liftingIntegers[cm], tmpzbuffer[cm]);
  434. mpz_add(resultmpz[i], resultmpz[i], tmpzbuffer[cm]);
  435. }
  436. mpz_mod(resultmpz[i], resultmpz[i], moduliProduct);
  437. }
  438. for(int cm = 0; cm < nbModuli;cm++) {
  439. mpz_clear(tmpzbuffer[cm]);
  440. }
  441. delete[] tmpzbuffer;
  442. return resultmpz;
  443. }
  444. // Debug printing function
  445. void NFLlib::print_poly64hex(poly64 p, unsigned int coeff_nbr)
  446. {
  447. std::cout << "[";
  448. for (unsigned int i = 0 ; i < coeff_nbr ; i++)
  449. {
  450. std::cout << std::hex << (unsigned int)(p[i]>>32)<<(unsigned int) p[i] << " ";
  451. }
  452. std::cout << "]" << std::dec << std::endl;
  453. }
  454. // Debug printing function
  455. void NFLlib::print_poly64(poly64 p, unsigned int coeff_nbr)
  456. {
  457. std::cout << "[";
  458. for (unsigned int i = 0 ; i < coeff_nbr ; i++)
  459. {
  460. std::cout << p[i] << " ";
  461. }
  462. std::cout << "]" << std::endl;
  463. }
  464. // *********************************************************
  465. // Destructors and closing or freeing functions
  466. // *********************************************************
  467. // Destructor
  468. NFLlib::~NFLlib()
  469. {
  470. freeNTTMemory();
  471. }
  472. // Everything is commented out because of hard to predict issues with MacOsX
  473. // Uncomment on a computer with that OS before releasing
  474. void NFLlib::freeNTTMemory(){
  475. // alreadyInit says how many arrays we have allocated for the NTT
  476. for (unsigned i = 0 ; i < alreadyInit; i++)
  477. {
  478. free(phis[i]);
  479. free(shoupphis[i]);
  480. free(invpoly_times_invphis[i]);
  481. free(shoupinvpoly_times_invphis[i]);
  482. free(omegas[i]);
  483. free(invomegas[i]);
  484. mpz_clear(liftingIntegers[i]);
  485. if (i == alreadyInit - 1)
  486. {
  487. free(shoupphis);
  488. free(invpoly_times_invphis);
  489. free(shoupinvpoly_times_invphis);
  490. free(omegas);
  491. free(shoupomegas);
  492. free(invomegas);
  493. free(shoupinvomegas);
  494. free(invpolyDegree);
  495. delete[] liftingIntegers;
  496. free(inv_indexes);
  497. mpz_clear(moduliProduct);
  498. }
  499. }
  500. alreadyInit = 0;
  501. }
  502. // ****************************************************************************************
  503. // THE DEN: Uncommented howling functions and pointer blood magic. Enter at your own risk.
  504. // ****************************************************************************************
  505. // We define first a back to back funtion to test our bitsplitter function
  506. // If DEBUG_BITSPLIT_B2B the function is used, else it is ignored
  507. //#define DEBUG_BITSPLIT_B2B
  508. #ifdef DEBUG_BITSPLIT_B2B
  509. #define DEBUG_BITSPLIT
  510. #define B2BTEST(inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk, totalbitsread, bitsread, pointer64, bitsToRead) bitsplitter_backtoback_internal_test (inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk, totalbitsread, bitsread, pointer64, bitsToRead)
  511. #else
  512. #define B2BTEST(inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk, totalbitsread, bitsread, pointer64, bitsToRead) if(0);
  513. #endif
  514. uint64_t* NFLlib::bitsplitter_backtoback_internal_test (unsigned char** inDataBuffers, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk, uint64_t totalbitsread,uint64_t bitsread, uint64_t* pointer64, unsigned int bitsToRead)
  515. {
  516. unsigned int bufferIndex = totalbitsread/bitsPerBuffer;
  517. uint64_t bitPositionInBuffer = totalbitsread - bufferIndex*bitsPerBuffer;
  518. uint64_t bytePositionInBuffer = bitPositionInBuffer/8;
  519. unsigned int bitPositionInByte = bitPositionInBuffer%8;
  520. if (bitPositionInBuffer+sizeof(uint64_t)*8 > bitsPerBuffer)
  521. {
  522. std::cerr << "WARNING: Bitsplit goes beyond buffer size (let's hope it is allocated)" << std::endl;
  523. }
  524. if (bitPositionInBuffer + bitsToRead > bitsPerBuffer)
  525. {
  526. std::cerr << "CRITICAL: Bitsplit reads AND USES data beyond buffer space" << std::endl;
  527. std::cerr << "totalbitsread " << totalbitsread << std::endl;
  528. std::cerr << "bufferIndex " << bufferIndex << std::endl;
  529. std::cerr << "bitPositionInBuffer " << bitPositionInBuffer << std::endl;
  530. std::cerr << "bytePositionInBuffer " << bytePositionInBuffer << std::endl;
  531. std::cerr << "CRITICAL: On B2B test called for " << bitsToRead << " bits" << std::endl;
  532. exit(-1);
  533. }
  534. uint64_t b2bresult = ((*((uint64_t *)(inDataBuffers[bufferIndex]+bytePositionInBuffer)))>>bitPositionInByte) & ((1ULL<<bitsToRead)-1) ;
  535. uint64_t bitsplitresult = ((*pointer64)>>bitsread) & ((1ULL<<bitsToRead)-1);
  536. if (b2bresult != bitsplitresult)
  537. {
  538. std::cerr << "CRITICAL: Bitsplit different from back to back function" << std::endl;
  539. std::cerr << "CRITICAL: Left " << b2bresult << std::endl;
  540. std::cerr << "CRITICAL: Right " << bitsplitresult << std::endl;
  541. exit(-1);
  542. }
  543. return 0;
  544. }
  545. inline void NFLlib::bs_loop (unsigned char** inDataBuffers, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk, uint64_t *&tmpdata, uint64_t bufferIndex, uint64_t &bitsread, size_t &subchunkIndex)
  546. {
  547. // We redefine the amount of uint64 that can be produced given that we may have already read
  548. uint64_t bitstoread = bitsPerBuffer - bitsread;
  549. double nbChunks = (double)(bitsPerBuffer-bitsread)/bitsPerChunk;
  550. uint64_t int_nbChunks = floor(nbChunks);
  551. // How many uint64_t are needed to encode a chunk
  552. const double uint64PerChunk = (double)bitsPerChunk/56;
  553. const uint64_t int_uint64PerChunk = ceil(uint64PerChunk);
  554. const bool isint_uint64PerChunk = (uint64PerChunk==(double)int_uint64PerChunk);
  555. // Compute subchunk sizes and masks
  556. uint64_t subchunkMasks[int_uint64PerChunk];
  557. // Increment with subchunkIndex=((subchunkIndex+1)%int_uint64PerChunk) and
  558. // use subchunkSizes[subchunkIndex];
  559. unsigned int subchunkSizes[int_uint64PerChunk];
  560. for (unsigned i = 0 ; i < int_uint64PerChunk - 1 ; i++)
  561. {
  562. subchunkSizes[i]=56;
  563. subchunkMasks[i] = (1ULL<<56)-1; // const mask for extracting 56 bits
  564. }
  565. subchunkSizes[int_uint64PerChunk-1] = bitsPerChunk - 56 * (int_uint64PerChunk - 1);
  566. subchunkMasks[int_uint64PerChunk-1] = (1ULL<<(subchunkSizes[int_uint64PerChunk-1]))-1;
  567. #ifdef DEBUG_BITSPLIT
  568. for (int i = 0 ; i < int_uint64PerChunk ; i++)
  569. {
  570. std::cerr<<"bitsplit0 i="<<i<<std::endl;
  571. std::cerr<<"bitsplit0 subchunkSizes[i]="<<subchunkSizes[i]<<std::endl;
  572. std::cerr<<"bitsplit0 subchunkMasks[i]="<<subchunkMasks[i]<<std::endl;
  573. }
  574. #endif
  575. // We compute how many extra subchunks are available taking into account that we may
  576. // start this new buffer on the middle of a chunk
  577. uint64_t supplementalSubchunks = 0;
  578. uint64_t cumulatedsize = 0;
  579. for (unsigned i = 0 ; i < int_uint64PerChunk ; i++)
  580. {
  581. if (cumulatedsize + subchunkSizes[(subchunkIndex + i) % int_uint64PerChunk] <= (bitsPerBuffer-bitsread)-int_nbChunks*bitsPerChunk)
  582. {
  583. supplementalSubchunks++;
  584. cumulatedsize += subchunkSizes[(subchunkIndex + i) % int_uint64PerChunk];
  585. }
  586. else
  587. {
  588. break;
  589. }
  590. }
  591. uint64_t totalSubChunks=int_nbChunks*int_uint64PerChunk + supplementalSubchunks;
  592. #ifdef DEBUG_BITSPLIT
  593. std::cerr<<"bitsplit1 nbrOfBuffers="<<nbrOfBuffers<<std::endl;
  594. std::cerr<<"bitsplit1 nbChunks="<<nbChunks<<std::endl;
  595. std::cerr<<"bitsplit1 int_nbChunks="<<int_nbChunks<<std::endl;
  596. std::cerr<<"bitsplit1 uint64PerChunk="<<uint64PerChunk<<std::endl;
  597. std::cerr<<"bitsplit1 int_uint64PerChunk="<<int_uint64PerChunk<<std::endl;
  598. std::cerr<<"bitsplit1 isint_uint64PerChunk="<<isint_uint64PerChunk<<std::endl;
  599. std::cerr<<"bitsplit1 totalSubChunks="<<totalSubChunks<<std::endl;
  600. std::cerr<<"bitsplit1 cumulatedsize " << cumulatedsize << std::endl;
  601. std::cerr<<"bitsplit1 bitsPerBuffer " << bitsPerBuffer << std::endl;
  602. std::cerr<<"bitsplit1 bitsread " << bitsread << std::endl;
  603. std::cerr<<"bitsplit1 nextsubchunksize " << subchunkSizes[(subchunkIndex)]<< std::endl;
  604. std::cerr<<"bitsplit1 nextsubchunk " << subchunkIndex<< std::endl;
  605. #endif
  606. unsigned char *tmppointer;
  607. uint64_t *pointer64;
  608. pointer64 = (uint64_t *) inDataBuffers[bufferIndex];
  609. uint64_t bitsremaining=0;
  610. // Loop over the subchunks in the current buffer
  611. for (uint64_t i = 0 ; i < totalSubChunks ; )
  612. {
  613. // Get up to 64bits
  614. while (bitsread + subchunkSizes[subchunkIndex] <= 64)
  615. {
  616. *tmpdata = ((*pointer64)>>bitsread) & subchunkMasks[subchunkIndex];
  617. tmpdata++;i++;
  618. bitsread += subchunkSizes[subchunkIndex];
  619. subchunkIndex= (subchunkIndex+1 == int_uint64PerChunk ? 0 : subchunkIndex + 1);
  620. if(i==totalSubChunks) break;
  621. }
  622. if (bitstoread > 128)
  623. {
  624. unsigned shift = bitsread >>3;
  625. tmppointer = (unsigned char*) pointer64;
  626. tmppointer += shift;
  627. pointer64 = (uint64_t *) (tmppointer);
  628. bitstoread-= shift<<3;
  629. bitsread -= shift<<3;
  630. }
  631. else
  632. {
  633. tmppointer = (unsigned char*) pointer64;
  634. while ((64 - bitsread < subchunkSizes[subchunkIndex]) && bitstoread > 0)
  635. {
  636. tmppointer++;
  637. bitsread -= 8;
  638. bitstoread -= 8;
  639. }
  640. pointer64 = (uint64_t *) (tmppointer);
  641. }
  642. }
  643. // If there is a last partial subchunk in this buffer, read it part from this buffer and part
  644. // from next buffer if available
  645. bitsremaining = (uint64_t) round((nbChunks-int_nbChunks)*bitsPerChunk - cumulatedsize );
  646. #ifdef DEBUG_BITSPLIT
  647. std::cout<<"bitsplit2 bitsremaining (should be <56)="<<bitsremaining<<std::endl;
  648. #endif
  649. if (bitsremaining !=0)
  650. {
  651. size_t shift=(64-(bitsread+bitsremaining))/8;
  652. bitsread+=(shift<<3);
  653. tmppointer = (unsigned char*) pointer64;
  654. tmppointer-=shift;
  655. pointer64 = (uint64_t *) (tmppointer);
  656. *tmpdata = ((*pointer64)>>bitsread) & ((1ULL<<bitsremaining)-1);
  657. // If there is another buffer to deal with, finish the current tmpdata uint64_t
  658. if (bufferIndex < nbrOfBuffers - 1)
  659. {
  660. pointer64 = (uint64_t *) inDataBuffers[bufferIndex+1];
  661. *tmpdata |= ((*pointer64)<<bitsremaining) & subchunkMasks[subchunkIndex];
  662. // We restart bitsread to the bits read in the new buffer
  663. bitsread = subchunkSizes[subchunkIndex] - bitsremaining;
  664. subchunkIndex= (subchunkIndex+1 == int_uint64PerChunk ? 0 : subchunkIndex + 1);
  665. tmpdata++;
  666. }
  667. }
  668. else
  669. {
  670. bitsread = 0;
  671. }
  672. }
  673. inline void NFLlib::bs_finish(poly64 &outdata, uint64_t int_uint64PerChunk, uint64_t polyNumber, uint64_t* splitData, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk)
  674. {
  675. if(int_uint64PerChunk>1) {
  676. outdata=(poly64) calloc(polyNumber*nbModuli*polyDegree + 1,sizeof(uint64_t));
  677. internalLongIntegersToCRT( splitData, outdata, int_uint64PerChunk, ceil(((double)bitsPerBuffer*nbrOfBuffers)/bitsPerChunk));
  678. free(splitData);
  679. }
  680. else
  681. {
  682. if (nbModuli > 1)
  683. {
  684. outdata=(poly64) calloc(polyNumber*nbModuli*polyDegree + 1,sizeof(uint64_t));
  685. for (unsigned i = 0 ; i < polyNumber ; i++)
  686. {
  687. for (int cm = 0 ; cm < nbModuli ; cm++)
  688. {
  689. memcpy(outdata + i*polyDegree*nbModuli + cm*polyDegree,
  690. splitData + i*polyDegree, polyDegree*sizeof(uint64_t));
  691. }
  692. }
  693. free(splitData);
  694. }
  695. else
  696. {
  697. outdata=splitData;
  698. }
  699. }
  700. }
  701. // This function does all the hard work of deserializeDataNFL
  702. // 1) Converts input into a set of polynomials with arbitrary large coefficients
  703. // 2) Reduces the polys through CRT to have nbModuli contiguous polys with uint64_t coefficients
  704. // Do not try to understand it, it is a nightmare, we won't try to explain it :)
  705. uint64_t* NFLlib::bitsplitter (unsigned char** inDataBuffers, uint64_t nbrOfBuffers, uint64_t bitsPerBuffer, unsigned int bitsPerChunk)
  706. {
  707. // If you don't need to change me don't try to understand me
  708. // If you need to change me, build me again from scratch :)
  709. // How many uint64_t are needed to encode a chunk
  710. const double uint64PerChunk = (double)bitsPerChunk/56;
  711. const uint64_t int_uint64PerChunk = ceil(uint64PerChunk);
  712. // How many polynomials are needed to encode the data
  713. uint64_t polyNumber =
  714. ceil((double)bitsPerBuffer*(double)nbrOfBuffers/(double)(bitsPerChunk*polyDegree));
  715. uint64_t* splitData =
  716. (uint64_t*)(calloc(polyNumber*polyDegree*int_uint64PerChunk+1,sizeof(uint64_t)));
  717. uint64_t* tmpdata=splitData;
  718. uint64_t bitsread=0;
  719. size_t subchunkIndex=0;
  720. // Loop over the buffers
  721. for (uint64_t h = 0 ; h < nbrOfBuffers ; h++)
  722. {
  723. bs_loop (inDataBuffers, nbrOfBuffers, bitsPerBuffer, bitsPerChunk,
  724. tmpdata, h, bitsread, subchunkIndex);
  725. }
  726. poly64 outdata;
  727. bs_finish(outdata, int_uint64PerChunk, polyNumber, splitData, nbrOfBuffers, bitsPerBuffer, bitsPerChunk);
  728. return outdata;
  729. }
  730. // Subroutine for bitsplitter, the daemonic function. This is the function that allows
  731. // us to circumvect GMP
  732. void NFLlib::internalLongIntegersToCRT(uint64_t* tmpdata, poly64 outdata, uint64_t int_uint64PerChunk, uint64_t totalNbChunks)
  733. {
  734. uint64_t* outdataPtr=outdata;
  735. uint64_t* indataPtr=tmpdata;
  736. uint64_t multiplier[nbModuli][int_uint64PerChunk];
  737. uint64_t* chunkParts[int_uint64PerChunk];
  738. for(int cm=0;cm<nbModuli;cm++)
  739. {
  740. multiplier[cm][0]=1;
  741. for(unsigned j=1;j<int_uint64PerChunk;j++)
  742. {
  743. multiplier[cm][j] = mulmod(multiplier[cm][j-1],1ULL<<56,moduli[cm]);
  744. }
  745. }
  746. for(unsigned i=0;i<totalNbChunks;i++)
  747. {
  748. for(unsigned j=0;j<int_uint64PerChunk;j++)
  749. {
  750. for(int cm=0;cm<nbModuli;cm++)
  751. {
  752. // set to zero before computation if not calloc'd
  753. *(outdataPtr+cm*polyDegree) += mulmod(*(indataPtr+j), multiplier[cm][j],moduli[cm]);
  754. }
  755. }
  756. indataPtr+=int_uint64PerChunk;
  757. outdataPtr++;
  758. if((i+1)%polyDegree==0)
  759. {
  760. outdataPtr += polyDegree*(nbModuli-1);
  761. }
  762. }
  763. }
  764. // ****************************************************************************************
  765. // YOU EXITED THE DEN ALIVE! Either you jumped a lot of lines,
  766. // or you became completely insane ... in the latter case ...
  767. // Welcome to the group!
  768. // ****************************************************************************************