fp2e.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. /*
  2. * File: dclxvi-20130329/fp2e.c
  3. * Author: Ruben Niederhagen, Peter Schwabe
  4. * Public Domain
  5. */
  6. #include <stdio.h>
  7. #include <assert.h>
  8. #include <math.h>
  9. #include "cpucycles.h"
  10. #ifdef NEW_PARAMETERS
  11. #include "scalar_512.h"
  12. #else
  13. #include "scalar.h"
  14. #endif
  15. #include "mul.h"
  16. extern "C" {
  17. #include "fpe.h"
  18. #include "fp2e.h"
  19. }
  20. extern const double bn_v;
  21. extern const double bn_v6;
  22. extern const scalar_t bn_pminus2;
  23. #ifdef N_OPS
  24. unsigned long long mulfp2ctr;
  25. unsigned long long mulfp2fpctr;
  26. unsigned long long sqfp2ctr;
  27. unsigned long long invfp2ctr;
  28. unsigned long long double2fp2ctr;
  29. unsigned long long doublefp2ctr;
  30. unsigned long long triple2fp2ctr;
  31. unsigned long long triplefp2ctr;
  32. unsigned long long mul_scalarfp2ctr;
  33. unsigned long long add2fp2ctr;
  34. unsigned long long addfp2ctr;
  35. unsigned long long sub2fp2ctr;
  36. unsigned long long subfp2ctr;
  37. unsigned long long neg2fp2ctr;
  38. unsigned long long negfp2ctr;
  39. unsigned long long mulxifp2ctr;
  40. unsigned long long conjugatefp2ctr;
  41. unsigned long long short_coeffredfp2ctr;
  42. #endif
  43. #ifndef QHASM
  44. void fp2e_short_coeffred_c(fp2e_t rop)
  45. {
  46. #ifdef N_OPS
  47. short_coeffredfp2ctr++;
  48. #endif
  49. mydouble carry11 = round(rop->v[22]/bn_v);
  50. mydouble carry11b = round(rop->v[23]/bn_v);
  51. rop->v[22] = remround(rop->v[22],bn_v);
  52. rop->v[23] = remround(rop->v[23],bn_v);
  53. rop->v[0] = rop->v[0] - carry11;
  54. rop->v[1] = rop->v[1] - carry11b;
  55. rop->v[6] = rop->v[6] - carry11;
  56. rop->v[7] = rop->v[7] - carry11b;
  57. rop->v[12] = rop->v[12] - 4*carry11;
  58. rop->v[13] = rop->v[13] - 4*carry11b;
  59. rop->v[18] = rop->v[18] - carry11;
  60. rop->v[19] = rop->v[19] - carry11b;
  61. mydouble carry1 = round(rop->v[2]/bn_v);
  62. mydouble carry1b = round(rop->v[3]/bn_v);
  63. rop->v[2] = remround(rop->v[2],bn_v);
  64. rop->v[3] = remround(rop->v[3],bn_v);
  65. rop->v[4] += carry1;
  66. rop->v[5] += carry1b;
  67. mydouble carry3 = round(rop->v[6]/bn_v);
  68. mydouble carry3b = round(rop->v[7]/bn_v);
  69. rop->v[6] = remround(rop->v[6],bn_v);
  70. rop->v[7] = remround(rop->v[7],bn_v);
  71. rop->v[8] += carry3;
  72. rop->v[9] += carry3b;
  73. mydouble carry5 = round(rop->v[10]/bn_v);
  74. mydouble carry5b = round(rop->v[11]/bn_v);
  75. rop->v[10] = remround(rop->v[10],bn_v);
  76. rop->v[11] = remround(rop->v[11],bn_v);
  77. rop->v[12] += carry5;
  78. rop->v[13] += carry5b;
  79. mydouble carry7 = round(rop->v[14]/bn_v);
  80. mydouble carry7b = round(rop->v[15]/bn_v);
  81. rop->v[14] = remround(rop->v[14],bn_v);
  82. rop->v[15] = remround(rop->v[15],bn_v);
  83. rop->v[16] += carry7;
  84. rop->v[17] += carry7b;
  85. mydouble carry9 = round(rop->v[18]/bn_v);
  86. mydouble carry9b = round(rop->v[19]/bn_v);
  87. rop->v[18] = remround(rop->v[18],bn_v);
  88. rop->v[19] = remround(rop->v[19],bn_v);
  89. rop->v[20] += carry9;
  90. rop->v[21] += carry9b;
  91. mydouble carry0 = round(rop->v[0]/bn_v6);
  92. mydouble carry0b = round(rop->v[1]/bn_v6);
  93. rop->v[0] = remround(rop->v[0],bn_v6);
  94. rop->v[1] = remround(rop->v[1],bn_v6);
  95. rop->v[2] += carry0;
  96. rop->v[3] += carry0b;
  97. mydouble carry2 = round(rop->v[4]/bn_v);
  98. mydouble carry2b = round(rop->v[5]/bn_v);
  99. rop->v[4] = remround(rop->v[4],bn_v);
  100. rop->v[5] = remround(rop->v[5],bn_v);
  101. rop->v[6] += carry2;
  102. rop->v[7] += carry2b;
  103. mydouble carry4 = round(rop->v[8]/bn_v);
  104. mydouble carry4b = round(rop->v[9]/bn_v);
  105. rop->v[8] = remround(rop->v[8],bn_v);
  106. rop->v[9] = remround(rop->v[9],bn_v);
  107. rop->v[10] += carry4;
  108. rop->v[11] += carry4b;
  109. mydouble carry6 = round(rop->v[12]/bn_v6);
  110. mydouble carry6b = round(rop->v[13]/bn_v6);
  111. rop->v[12] = remround(rop->v[12],bn_v6);
  112. rop->v[13] = remround(rop->v[13],bn_v6);
  113. rop->v[14] += carry6;
  114. rop->v[15] += carry6b;
  115. mydouble carry8 = round(rop->v[16]/bn_v);
  116. mydouble carry8b = round(rop->v[17]/bn_v);
  117. rop->v[16] = remround(rop->v[16],bn_v);
  118. rop->v[17] = remround(rop->v[17],bn_v);
  119. rop->v[18] += carry8;
  120. rop->v[19] += carry8b;
  121. mydouble carry10 = round(rop->v[20]/bn_v);
  122. mydouble carry10b = round(rop->v[21]/bn_v);
  123. rop->v[20] = remround(rop->v[20],bn_v);
  124. rop->v[21] = remround(rop->v[21],bn_v);
  125. rop->v[22] += carry10;
  126. rop->v[23] += carry10b;
  127. }
  128. #endif
  129. void fp2e_to_2fpe(fpe_t ropa, fpe_t ropb, const fp2e_t op)//ai+b
  130. {
  131. int i;
  132. for(i=0;i<12;i++)
  133. {
  134. ropb->v[i] = op->v[2*i];
  135. ropa->v[i] = op->v[2*i+1];
  136. }
  137. }
  138. void _2fpe_to_fp2e(fp2e_t rop, const fpe_t opa, const fpe_t opb)//ai+b
  139. {
  140. int i;
  141. for(i=0;i<12;i++)
  142. {
  143. rop->v[2*i] = opb->v[i];
  144. rop->v[2*i+1] = opa->v[i];
  145. }
  146. }
  147. // Set fp2e_t rop to given value:
  148. void fp2e_set(fp2e_t rop, const fp2e_t op)
  149. {
  150. int i;
  151. for(i=0;i<24;i++)
  152. rop->v[i] = op->v[i];
  153. }
  154. /* Communicate the fact that the fp2e is reduced (and that we don't know anything more about it) */
  155. void fp2e_isreduced(fp2e_t rop)
  156. {
  157. setmax(rop->v[0],(long)bn_v6/2);
  158. setmax(rop->v[1],(long)bn_v6/2);
  159. setmax(rop->v[12],(long)bn_v6/2);
  160. setmax(rop->v[13],(long)bn_v6/2);
  161. setmax(rop->v[2],(long)bn_v/2);
  162. setmax(rop->v[3],(long)bn_v/2);
  163. setmax(rop->v[6],(long)bn_v/2);
  164. setmax(rop->v[7],(long)bn_v/2);
  165. setmax(rop->v[8],(long)bn_v/2);
  166. setmax(rop->v[9],(long)bn_v/2);
  167. setmax(rop->v[14],(long)bn_v/2);
  168. setmax(rop->v[15],(long)bn_v/2);
  169. setmax(rop->v[18],(long)bn_v/2);
  170. setmax(rop->v[19],(long)bn_v/2);
  171. setmax(rop->v[20],(long)bn_v/2);
  172. setmax(rop->v[21],(long)bn_v/2);
  173. //XXX: Change additive constant
  174. setmax(rop->v[4],(long)bn_v/2+2331); /* TODO: Think about value */
  175. setmax(rop->v[5],(long)bn_v/2+2331); /* TODO: Think about value */
  176. setmax(rop->v[10],(long)bn_v/2+2331); /* TODO: Think about value */
  177. setmax(rop->v[11],(long)bn_v/2+2331); /* TODO: Think about value */
  178. setmax(rop->v[16],(long)bn_v/2+2331); /* TODO: Think about value */
  179. setmax(rop->v[17],(long)bn_v/2+2331); /* TODO: Think about value */
  180. setmax(rop->v[22],(long)bn_v/2+2331); /* TODO: Think about value */
  181. setmax(rop->v[23],(long)bn_v/2+2331); /* TODO: Think about value */
  182. }
  183. // Set fp2e_t rop to given value:
  184. void fp2e_set_fpe(fp2e_t rop, const fpe_t op)
  185. {
  186. int i;
  187. for(i=0;i<12;i++)
  188. {
  189. rop->v[2*i] = op->v[i];
  190. rop->v[2*i+1] = 0;
  191. }
  192. }
  193. // Set rop to one
  194. void fp2e_setone(fp2e_t rop)
  195. {
  196. int i;
  197. for(i=1;i<24;i++)
  198. rop->v[i] = 0;
  199. rop->v[0] = 1.;
  200. }
  201. // Set rop to zero
  202. void fp2e_setzero(fp2e_t rop)
  203. {
  204. int i;
  205. for(i=0;i<24;i++)
  206. rop->v[i] = 0;
  207. }
  208. // Compare for equality:
  209. int fp2e_iseq(const fp2e_t op1, const fp2e_t op2)
  210. {
  211. fpe_t a1,b1,a2,b2;
  212. fp2e_to_2fpe(a1,b1,op1);
  213. fp2e_to_2fpe(a2,b2,op2);
  214. return fpe_iseq(a1,a2) && fpe_iseq(b1,b2);
  215. }
  216. int fp2e_isone(const fp2e_t op)
  217. {
  218. fpe_t ta, tb;
  219. fp2e_to_2fpe(ta, tb, op);
  220. int ret = fpe_iszero(ta);
  221. ret = ret && fpe_isone(tb);
  222. return ret;
  223. }
  224. int fp2e_iszero(const fp2e_t op)
  225. {
  226. fpe_t ta, tb;
  227. fp2e_to_2fpe(ta, tb, op);
  228. int ret = fpe_iszero(ta);
  229. ret = ret && fpe_iszero(tb);
  230. return ret;
  231. }
  232. void fp2e_cmov(fp2e_t rop, const fp2e_t op, int c)
  233. {
  234. int i;
  235. for(i=0;i<24;i++)
  236. rop->v[i] = (1-c)*rop->v[i] + c*op->v[i];
  237. }
  238. #ifndef QHASM
  239. // Double an fp2e:
  240. void fp2e_double2_c(fp2e_t rop)
  241. {
  242. #ifdef N_OPS
  243. double2fp2ctr++;
  244. #endif
  245. int i;
  246. for(i=0;i<24;i++)
  247. rop->v[i] = 2*rop->v[i];
  248. }
  249. #endif
  250. #ifndef QHASM
  251. // Double an fp2e:
  252. void fp2e_double_c(fp2e_t rop, const fp2e_t op)
  253. {
  254. #ifdef N_OPS
  255. doublefp2ctr++;
  256. #endif
  257. int i;
  258. for(i=0;i<24;i++)
  259. rop->v[i] = 2*op->v[i];
  260. }
  261. #endif
  262. #ifndef QHASM
  263. // Triple an fp2e:
  264. void fp2e_triple2_c(fp2e_t rop)
  265. {
  266. #ifdef N_OPS
  267. triple2fp2ctr++;
  268. #endif
  269. int i;
  270. for(i=0;i<24;i++)
  271. rop->v[i] = 3*rop->v[i];
  272. }
  273. #endif
  274. #ifndef QHASM
  275. // Triple an fp2e:
  276. void fp2e_triple_c(fp2e_t rop, const fp2e_t op)
  277. {
  278. #ifdef N_OPS
  279. triplefp2ctr++;
  280. #endif
  281. int i;
  282. for(i=0;i<24;i++)
  283. rop->v[i] = 3*op->v[i];
  284. }
  285. #endif
  286. void fp2e_mul_scalar(fp2e_t rop, const fp2e_t op, const int s)
  287. {
  288. #ifdef N_OPS
  289. mul_scalarfp2ctr++;
  290. #endif
  291. int i;
  292. for(i=0;i<24;i++)
  293. rop->v[i] = s*op->v[i];
  294. }
  295. // Add two fp2e, store result in op1:
  296. #ifndef QHASM
  297. void fp2e_add2_c(fp2e_t op1, const fp2e_t op2)
  298. {
  299. #ifdef N_OPS
  300. add2fp2ctr++;
  301. #endif
  302. int i;
  303. for(i=0;i<24;i++)
  304. op1->v[i] += op2->v[i];
  305. }
  306. #endif
  307. #ifndef QHASM
  308. // Add two fp2e, store result in rop:
  309. void fp2e_add_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  310. {
  311. #ifdef N_OPS
  312. addfp2ctr++;
  313. #endif
  314. int i;
  315. for(i=0;i<24;i++)
  316. rop->v[i] = op1->v[i] + op2->v[i];
  317. }
  318. #endif
  319. #ifndef QHASM
  320. // Subtract op2 from op1, store result in op1:
  321. void fp2e_sub2_c(fp2e_t op1, const fp2e_t op2)
  322. {
  323. #ifdef N_OPS
  324. sub2fp2ctr++;
  325. #endif
  326. int i;
  327. for(i=0;i<24;i++)
  328. op1->v[i] -= op2->v[i];
  329. }
  330. #endif
  331. #ifndef QHASM
  332. // Subtract op2 from op1, store result in rop:
  333. void fp2e_sub_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  334. {
  335. #ifdef N_OPS
  336. subfp2ctr++;
  337. #endif
  338. int i;
  339. for(i=0;i<24;i++)
  340. rop->v[i] = op1->v[i] - op2->v[i];
  341. }
  342. #endif
  343. #ifndef QHASM
  344. // Negate op
  345. void fp2e_neg2_c(fp2e_t op)
  346. {
  347. #ifdef N_OPS
  348. neg2fp2ctr++;
  349. #endif
  350. int i;
  351. for(i=0;i<24;i++)
  352. op->v[i] = -op->v[i];
  353. }
  354. #endif
  355. #ifndef QHASM
  356. // Negate op
  357. void fp2e_neg_c(fp2e_t rop, const fp2e_t op)
  358. {
  359. #ifdef N_OPS
  360. negfp2ctr++;
  361. #endif
  362. int i;
  363. for(i=0;i<24;i++)
  364. rop->v[i] = -op->v[i];
  365. }
  366. #endif
  367. #ifndef QHASM
  368. // Conjugates: aX+b to -aX+b
  369. void fp2e_conjugate_c(fp2e_t rop, const fp2e_t op)
  370. {
  371. #ifdef N_OPS
  372. conjugatefp2ctr++;
  373. #endif
  374. int i;
  375. for(i=0;i<24;i+=2)
  376. {
  377. rop->v[i] = op->v[i];
  378. rop->v[i+1] = op->v[i+1] * (-1);
  379. }
  380. }
  381. #endif
  382. #ifndef QHASM
  383. // Multiply two fp2e, store result in rop:
  384. void fp2e_mul_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  385. {
  386. #ifdef N_OPS
  387. mulfp2ctr += 1;
  388. #endif
  389. fpe_t a1, b1, a2, b2, r1, r2;
  390. mydouble a3[24], b3[24];
  391. int i;
  392. mydouble t0[24], t1[24], t2[24], t3[24];
  393. fp2e_to_2fpe(a1, b1, op1);
  394. fp2e_to_2fpe(a2, b2, op2);
  395. polymul(t1, a1->v, b2->v); // t1 = a1*b2
  396. polymul(t2, b1->v, a2->v); // t2 = b1*a2
  397. for(i=0; i<12; i++) // t3 = 1*a1
  398. {
  399. t3[i] = 1*a1->v[i];
  400. }
  401. polymul(t3, t3, a2->v); // t3 = 1*a1*a2
  402. polymul(t0, b1->v, b2->v); // t0 = b1*b2
  403. for(i=0; i<23; i++)
  404. {
  405. a3[i] = t1[i] + t2[i]; // a3 = a1*b2 + b1*a2
  406. b3[i] = t0[i] - t3[i]; // b3 = b1*b2 - 1*a1*a2
  407. }
  408. degred(a3);
  409. degred(b3);
  410. coeffred_round_par(a3);
  411. coeffred_round_par(b3);
  412. fpe_set_doublearray(r1, a3);
  413. fpe_set_doublearray(r2, b3);
  414. _2fpe_to_fp2e(rop, r1, r2);
  415. }
  416. #endif
  417. #ifndef QHASM
  418. // Square an fp2e, store result in rop:
  419. void fp2e_square_c(fp2e_t rop, const fp2e_t op)
  420. {
  421. #ifdef N_OPS
  422. sqfp2ctr += 1;
  423. #endif
  424. fpe_t a1, b1, r1, r2;
  425. mydouble ropa[24], ropb[24];
  426. fp2e_to_2fpe(a1, b1, op);
  427. int i;
  428. /* CheckDoubles are not smart enough to recognize
  429. * binomial formula to compute b^2-a^2 */
  430. #ifdef CHECK
  431. mydouble d1[24];
  432. polymul(d1, a1->v, a1->v);
  433. polymul(ropb, b1->v, b1->v);
  434. polymul(ropa, b1->v, a1->v);
  435. for(i=0;i<23;i++)
  436. {
  437. ropb[i] -= d1[i];
  438. ropa[i] *= 2;
  439. }
  440. #else
  441. fpe_t t1, t2, t3;
  442. for(i=0;i<12;i++)
  443. {
  444. t1->v[i] = a1->v[i] + b1->v[i];
  445. t2->v[i] = b1->v[i] - a1->v[i];
  446. t3->v[i] = 2*b1->v[i];
  447. }
  448. polymul(ropa, a1->v, t3->v);
  449. polymul(ropb, t1->v, t2->v);
  450. #endif
  451. degred(ropa);
  452. degred(ropb);
  453. coeffred_round_par(ropa);
  454. coeffred_round_par(ropb);
  455. fpe_set_doublearray(r1, ropa);
  456. fpe_set_doublearray(r2, ropb);
  457. _2fpe_to_fp2e(rop, r1, r2);
  458. }
  459. #endif
  460. #ifndef NEW_PARAMETERS
  461. #ifndef QHASM
  462. // Multiply by xi=i+3 which is used to construct F_p^6
  463. // (a*i + b)*(i + 3) = (3*b - 1*a) + (3*a + b)*i
  464. void fp2e_mulxi_c(fp2e_t rop, const fp2e_t op)
  465. {
  466. #ifdef N_OPS
  467. mulxifp2ctr++;
  468. #endif
  469. fpe_t a, b, t1, t2, t3, t4, t5;
  470. fp2e_to_2fpe(a, b, op);
  471. int i;
  472. for(i=0; i<12; i++)
  473. {
  474. t1->v[i] = 3*a->v[i]; // t1 = 3*a
  475. t2->v[i] = 3*b->v[i]; // t2 = 3*b
  476. t3->v[i] = 1*a->v[i]; // t3 = 1*a
  477. }
  478. fpe_add(t4, t1, b); // t4 = 3*a + b
  479. fpe_sub(t5, t2, t3); // t5 = 3*b - 1*a
  480. _2fpe_to_fp2e(rop, t4, t5);
  481. }
  482. #endif
  483. #endif
  484. // Scalar multiple of an fp2e, store result in rop:
  485. #ifndef QHASM
  486. void fp2e_mul_fpe_c(fp2e_t rop, const fp2e_t op1, const fpe_t op2)
  487. {
  488. #ifdef N_OPS
  489. mulfp2fpctr += 1;
  490. #endif
  491. fpe_t a1,b1;
  492. fp2e_to_2fpe(a1,b1,op1);
  493. fpe_mul(a1,a1,op2);
  494. fpe_mul(b1,b1,op2);
  495. _2fpe_to_fp2e(rop,a1,b1);
  496. }
  497. #endif
  498. #ifndef QHASM
  499. /* computes (op1->m_a*op2->m_a, op1->m_b*op2->m_b) */
  500. void fp2e_parallel_coeffmul_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  501. {
  502. fpe_t a1, b1, a2, b2; // Needed for intermediary results
  503. fp2e_to_2fpe(a1,b1,op1);
  504. fp2e_to_2fpe(a2,b2,op2);
  505. fpe_mul(a1, a1, a2);
  506. fpe_mul(b1, b1, b2);
  507. _2fpe_to_fp2e(rop, a1, b1);
  508. }
  509. #endif
  510. // Inverse multiple of an fp2e, store result in rop:
  511. void fp2e_invert(fp2e_t rop, const fp2e_t op)
  512. {
  513. #ifdef N_OPS
  514. invfp2ctr += 1;
  515. #endif
  516. /* New version */
  517. fp2e_t d1, d2;
  518. int i;
  519. fp2e_parallel_coeffmul(d1, op, op);
  520. for(i=0;i<24;i+=2)
  521. d1->v[i] = d1->v[i+1] = d1->v[i] + d1->v[i+1];
  522. fp2e_short_coeffred(d1);
  523. for(i=0;i<24;i+=2)
  524. {
  525. d2->v[i] = op->v[i];
  526. d2->v[i+1] = -op->v[i+1];
  527. }
  528. fp2e_set(rop,d1);
  529. for(i = 254; i >= 0; i--)
  530. {
  531. fp2e_parallel_coeffmul(rop,rop,rop);
  532. if(scalar_getbit(bn_pminus2, i))
  533. fp2e_parallel_coeffmul(rop,rop,d1);
  534. }
  535. fp2e_parallel_coeffmul(rop,rop,d2);
  536. }
  537. // Print the fp2e:
  538. void fp2e_print(FILE *outfile, const fp2e_t op)
  539. {
  540. fpe_t a,b;
  541. fp2e_to_2fpe(a,b,op);
  542. fpe_print(outfile, a);
  543. fprintf(outfile," * X + ");
  544. fpe_print(outfile, b);
  545. }