fp2e.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. /*
  2. * File: dclxvi-20130329/fp2e.c
  3. * Author: Ruben Niederhagen, Peter Schwabe
  4. * Public Domain
  5. */
  6. #include <stdio.h>
  7. #include <assert.h>
  8. #include <math.h>
  9. #include "cpucycles.h"
  10. #include "fp2e.h"
  11. #include "fpe.h"
  12. #include "mul.h"
  13. #include "scalar.h"
  14. extern const double bn_v;
  15. extern const double bn_v6;
  16. extern const scalar_t bn_pminus2;
  17. #ifdef N_OPS
  18. unsigned long long mulfp2ctr;
  19. unsigned long long mulfp2fpctr;
  20. unsigned long long sqfp2ctr;
  21. unsigned long long invfp2ctr;
  22. unsigned long long double2fp2ctr;
  23. unsigned long long doublefp2ctr;
  24. unsigned long long triple2fp2ctr;
  25. unsigned long long triplefp2ctr;
  26. unsigned long long mul_scalarfp2ctr;
  27. unsigned long long add2fp2ctr;
  28. unsigned long long addfp2ctr;
  29. unsigned long long sub2fp2ctr;
  30. unsigned long long subfp2ctr;
  31. unsigned long long neg2fp2ctr;
  32. unsigned long long negfp2ctr;
  33. unsigned long long mulxifp2ctr;
  34. unsigned long long conjugatefp2ctr;
  35. unsigned long long short_coeffredfp2ctr;
  36. #endif
  37. #ifndef QHASM
  38. void fp2e_short_coeffred_c(fp2e_t rop)
  39. {
  40. #ifdef N_OPS
  41. short_coeffredfp2ctr++;
  42. #endif
  43. mydouble carry11 = round(rop->v[22]/bn_v);
  44. mydouble carry11b = round(rop->v[23]/bn_v);
  45. rop->v[22] = remround(rop->v[22],bn_v);
  46. rop->v[23] = remround(rop->v[23],bn_v);
  47. rop->v[0] = rop->v[0] - carry11;
  48. rop->v[1] = rop->v[1] - carry11b;
  49. rop->v[6] = rop->v[6] - carry11;
  50. rop->v[7] = rop->v[7] - carry11b;
  51. rop->v[12] = rop->v[12] - 4*carry11;
  52. rop->v[13] = rop->v[13] - 4*carry11b;
  53. rop->v[18] = rop->v[18] - carry11;
  54. rop->v[19] = rop->v[19] - carry11b;
  55. mydouble carry1 = round(rop->v[2]/bn_v);
  56. mydouble carry1b = round(rop->v[3]/bn_v);
  57. rop->v[2] = remround(rop->v[2],bn_v);
  58. rop->v[3] = remround(rop->v[3],bn_v);
  59. rop->v[4] += carry1;
  60. rop->v[5] += carry1b;
  61. mydouble carry3 = round(rop->v[6]/bn_v);
  62. mydouble carry3b = round(rop->v[7]/bn_v);
  63. rop->v[6] = remround(rop->v[6],bn_v);
  64. rop->v[7] = remround(rop->v[7],bn_v);
  65. rop->v[8] += carry3;
  66. rop->v[9] += carry3b;
  67. mydouble carry5 = round(rop->v[10]/bn_v);
  68. mydouble carry5b = round(rop->v[11]/bn_v);
  69. rop->v[10] = remround(rop->v[10],bn_v);
  70. rop->v[11] = remround(rop->v[11],bn_v);
  71. rop->v[12] += carry5;
  72. rop->v[13] += carry5b;
  73. mydouble carry7 = round(rop->v[14]/bn_v);
  74. mydouble carry7b = round(rop->v[15]/bn_v);
  75. rop->v[14] = remround(rop->v[14],bn_v);
  76. rop->v[15] = remround(rop->v[15],bn_v);
  77. rop->v[16] += carry7;
  78. rop->v[17] += carry7b;
  79. mydouble carry9 = round(rop->v[18]/bn_v);
  80. mydouble carry9b = round(rop->v[19]/bn_v);
  81. rop->v[18] = remround(rop->v[18],bn_v);
  82. rop->v[19] = remround(rop->v[19],bn_v);
  83. rop->v[20] += carry9;
  84. rop->v[21] += carry9b;
  85. mydouble carry0 = round(rop->v[0]/bn_v6);
  86. mydouble carry0b = round(rop->v[1]/bn_v6);
  87. rop->v[0] = remround(rop->v[0],bn_v6);
  88. rop->v[1] = remround(rop->v[1],bn_v6);
  89. rop->v[2] += carry0;
  90. rop->v[3] += carry0b;
  91. mydouble carry2 = round(rop->v[4]/bn_v);
  92. mydouble carry2b = round(rop->v[5]/bn_v);
  93. rop->v[4] = remround(rop->v[4],bn_v);
  94. rop->v[5] = remround(rop->v[5],bn_v);
  95. rop->v[6] += carry2;
  96. rop->v[7] += carry2b;
  97. mydouble carry4 = round(rop->v[8]/bn_v);
  98. mydouble carry4b = round(rop->v[9]/bn_v);
  99. rop->v[8] = remround(rop->v[8],bn_v);
  100. rop->v[9] = remround(rop->v[9],bn_v);
  101. rop->v[10] += carry4;
  102. rop->v[11] += carry4b;
  103. mydouble carry6 = round(rop->v[12]/bn_v6);
  104. mydouble carry6b = round(rop->v[13]/bn_v6);
  105. rop->v[12] = remround(rop->v[12],bn_v6);
  106. rop->v[13] = remround(rop->v[13],bn_v6);
  107. rop->v[14] += carry6;
  108. rop->v[15] += carry6b;
  109. mydouble carry8 = round(rop->v[16]/bn_v);
  110. mydouble carry8b = round(rop->v[17]/bn_v);
  111. rop->v[16] = remround(rop->v[16],bn_v);
  112. rop->v[17] = remround(rop->v[17],bn_v);
  113. rop->v[18] += carry8;
  114. rop->v[19] += carry8b;
  115. mydouble carry10 = round(rop->v[20]/bn_v);
  116. mydouble carry10b = round(rop->v[21]/bn_v);
  117. rop->v[20] = remround(rop->v[20],bn_v);
  118. rop->v[21] = remround(rop->v[21],bn_v);
  119. rop->v[22] += carry10;
  120. rop->v[23] += carry10b;
  121. }
  122. #endif
  123. void fp2e_to_2fpe(fpe_t ropa, fpe_t ropb, const fp2e_t op)
  124. {
  125. int i;
  126. for(i=0;i<12;i++)
  127. {
  128. ropb->v[i] = op->v[2*i];
  129. ropa->v[i] = op->v[2*i+1];
  130. }
  131. }
  132. void _2fpe_to_fp2e(fp2e_t rop, const fpe_t opa, const fpe_t opb)
  133. {
  134. int i;
  135. for(i=0;i<12;i++)
  136. {
  137. rop->v[2*i] = opb->v[i];
  138. rop->v[2*i+1] = opa->v[i];
  139. }
  140. }
  141. // Set fp2e_t rop to given value:
  142. void fp2e_set(fp2e_t rop, const fp2e_t op)
  143. {
  144. int i;
  145. for(i=0;i<24;i++)
  146. rop->v[i] = op->v[i];
  147. }
  148. /* Communicate the fact that the fp2e is reduced (and that we don't know anything more about it) */
  149. void fp2e_isreduced(fp2e_t rop)
  150. {
  151. setmax(rop->v[0],(long)bn_v6/2);
  152. setmax(rop->v[1],(long)bn_v6/2);
  153. setmax(rop->v[12],(long)bn_v6/2);
  154. setmax(rop->v[13],(long)bn_v6/2);
  155. setmax(rop->v[2],(long)bn_v/2);
  156. setmax(rop->v[3],(long)bn_v/2);
  157. setmax(rop->v[6],(long)bn_v/2);
  158. setmax(rop->v[7],(long)bn_v/2);
  159. setmax(rop->v[8],(long)bn_v/2);
  160. setmax(rop->v[9],(long)bn_v/2);
  161. setmax(rop->v[14],(long)bn_v/2);
  162. setmax(rop->v[15],(long)bn_v/2);
  163. setmax(rop->v[18],(long)bn_v/2);
  164. setmax(rop->v[19],(long)bn_v/2);
  165. setmax(rop->v[20],(long)bn_v/2);
  166. setmax(rop->v[21],(long)bn_v/2);
  167. //XXX: Change additive constant
  168. setmax(rop->v[4],(long)bn_v/2+2331); /* TODO: Think about value */
  169. setmax(rop->v[5],(long)bn_v/2+2331); /* TODO: Think about value */
  170. setmax(rop->v[10],(long)bn_v/2+2331); /* TODO: Think about value */
  171. setmax(rop->v[11],(long)bn_v/2+2331); /* TODO: Think about value */
  172. setmax(rop->v[16],(long)bn_v/2+2331); /* TODO: Think about value */
  173. setmax(rop->v[17],(long)bn_v/2+2331); /* TODO: Think about value */
  174. setmax(rop->v[22],(long)bn_v/2+2331); /* TODO: Think about value */
  175. setmax(rop->v[23],(long)bn_v/2+2331); /* TODO: Think about value */
  176. }
  177. // Set fp2e_t rop to given value:
  178. void fp2e_set_fpe(fp2e_t rop, const fpe_t op)
  179. {
  180. int i;
  181. for(i=0;i<12;i++)
  182. {
  183. rop->v[2*i] = op->v[i];
  184. rop->v[2*i+1] = 0;
  185. }
  186. }
  187. // Set rop to one
  188. void fp2e_setone(fp2e_t rop)
  189. {
  190. int i;
  191. for(i=1;i<24;i++)
  192. rop->v[i] = 0;
  193. rop->v[0] = 1.;
  194. }
  195. // Set rop to zero
  196. void fp2e_setzero(fp2e_t rop)
  197. {
  198. int i;
  199. for(i=0;i<24;i++)
  200. rop->v[i] = 0;
  201. }
  202. // Compare for equality:
  203. int fp2e_iseq(const fp2e_t op1, const fp2e_t op2)
  204. {
  205. fpe_t a1,b1,a2,b2;
  206. fp2e_to_2fpe(a1,b1,op1);
  207. fp2e_to_2fpe(a2,b2,op2);
  208. return fpe_iseq(a1,a2) && fpe_iseq(b1,b2);
  209. }
  210. int fp2e_isone(const fp2e_t op)
  211. {
  212. fpe_t ta, tb;
  213. fp2e_to_2fpe(ta, tb, op);
  214. int ret = fpe_iszero(ta);
  215. ret = ret && fpe_isone(tb);
  216. return ret;
  217. }
  218. int fp2e_iszero(const fp2e_t op)
  219. {
  220. fpe_t ta, tb;
  221. fp2e_to_2fpe(ta, tb, op);
  222. int ret = fpe_iszero(ta);
  223. ret = ret && fpe_iszero(tb);
  224. return ret;
  225. }
  226. void fp2e_cmov(fp2e_t rop, const fp2e_t op, int c)
  227. {
  228. int i;
  229. for(i=0;i<24;i++)
  230. rop->v[i] = (1-c)*rop->v[i] + c*op->v[i];
  231. }
  232. #ifndef QHASM
  233. // Double an fp2e:
  234. void fp2e_double2_c(fp2e_t rop)
  235. {
  236. #ifdef N_OPS
  237. double2fp2ctr++;
  238. #endif
  239. int i;
  240. for(i=0;i<24;i++)
  241. rop->v[i] = 2*rop->v[i];
  242. }
  243. #endif
  244. #ifndef QHASM
  245. // Double an fp2e:
  246. void fp2e_double_c(fp2e_t rop, const fp2e_t op)
  247. {
  248. #ifdef N_OPS
  249. doublefp2ctr++;
  250. #endif
  251. int i;
  252. for(i=0;i<24;i++)
  253. rop->v[i] = 2*op->v[i];
  254. }
  255. #endif
  256. #ifndef QHASM
  257. // Triple an fp2e:
  258. void fp2e_triple2_c(fp2e_t rop)
  259. {
  260. #ifdef N_OPS
  261. triple2fp2ctr++;
  262. #endif
  263. int i;
  264. for(i=0;i<24;i++)
  265. rop->v[i] = 3*rop->v[i];
  266. }
  267. #endif
  268. #ifndef QHASM
  269. // Triple an fp2e:
  270. void fp2e_triple_c(fp2e_t rop, const fp2e_t op)
  271. {
  272. #ifdef N_OPS
  273. triplefp2ctr++;
  274. #endif
  275. int i;
  276. for(i=0;i<24;i++)
  277. rop->v[i] = 3*op->v[i];
  278. }
  279. #endif
  280. void fp2e_mul_scalar(fp2e_t rop, const fp2e_t op, const int s)
  281. {
  282. #ifdef N_OPS
  283. mul_scalarfp2ctr++;
  284. #endif
  285. int i;
  286. for(i=0;i<24;i++)
  287. rop->v[i] = s*op->v[i];
  288. }
  289. // Add two fp2e, store result in op1:
  290. #ifndef QHASM
  291. void fp2e_add2_c(fp2e_t op1, const fp2e_t op2)
  292. {
  293. #ifdef N_OPS
  294. add2fp2ctr++;
  295. #endif
  296. int i;
  297. for(i=0;i<24;i++)
  298. op1->v[i] += op2->v[i];
  299. }
  300. #endif
  301. #ifndef QHASM
  302. // Add two fp2e, store result in rop:
  303. void fp2e_add_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  304. {
  305. #ifdef N_OPS
  306. addfp2ctr++;
  307. #endif
  308. int i;
  309. for(i=0;i<24;i++)
  310. rop->v[i] = op1->v[i] + op2->v[i];
  311. }
  312. #endif
  313. #ifndef QHASM
  314. // Subtract op2 from op1, store result in op1:
  315. void fp2e_sub2_c(fp2e_t op1, const fp2e_t op2)
  316. {
  317. #ifdef N_OPS
  318. sub2fp2ctr++;
  319. #endif
  320. int i;
  321. for(i=0;i<24;i++)
  322. op1->v[i] -= op2->v[i];
  323. }
  324. #endif
  325. #ifndef QHASM
  326. // Subtract op2 from op1, store result in rop:
  327. void fp2e_sub_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  328. {
  329. #ifdef N_OPS
  330. subfp2ctr++;
  331. #endif
  332. int i;
  333. for(i=0;i<24;i++)
  334. rop->v[i] = op1->v[i] - op2->v[i];
  335. }
  336. #endif
  337. #ifndef QHASM
  338. // Negate op
  339. void fp2e_neg2_c(fp2e_t op)
  340. {
  341. #ifdef N_OPS
  342. neg2fp2ctr++;
  343. #endif
  344. int i;
  345. for(i=0;i<24;i++)
  346. op->v[i] = -op->v[i];
  347. }
  348. #endif
  349. #ifndef QHASM
  350. // Negate op
  351. void fp2e_neg_c(fp2e_t rop, const fp2e_t op)
  352. {
  353. #ifdef N_OPS
  354. negfp2ctr++;
  355. #endif
  356. int i;
  357. for(i=0;i<24;i++)
  358. rop->v[i] = -op->v[i];
  359. }
  360. #endif
  361. #ifndef QHASM
  362. // Conjugates: aX+b to -aX+b
  363. void fp2e_conjugate_c(fp2e_t rop, const fp2e_t op)
  364. {
  365. #ifdef N_OPS
  366. conjugatefp2ctr++;
  367. #endif
  368. int i;
  369. for(i=0;i<24;i+=2)
  370. {
  371. rop->v[i] = op->v[i];
  372. rop->v[i+1] = op->v[i+1] * (-1);
  373. }
  374. }
  375. #endif
  376. #ifndef QHASM
  377. // Multiply two fp2e, store result in rop:
  378. void fp2e_mul_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  379. {
  380. #ifdef N_OPS
  381. mulfp2ctr += 1;
  382. #endif
  383. fpe_t a1, b1, a2, b2, r1, r2;
  384. mydouble a3[24], b3[24];
  385. int i;
  386. mydouble t0[24], t1[24], t2[24], t3[24];
  387. fp2e_to_2fpe(a1, b1, op1);
  388. fp2e_to_2fpe(a2, b2, op2);
  389. polymul(t1, a1->v, b2->v); // t1 = a1*b2
  390. polymul(t2, b1->v, a2->v); // t2 = b1*a2
  391. for(i=0; i<12; i++) // t3 = 1*a1
  392. {
  393. t3[i] = 1*a1->v[i];
  394. }
  395. polymul(t3, t3, a2->v); // t3 = 1*a1*a2
  396. polymul(t0, b1->v, b2->v); // t0 = b1*b2
  397. for(i=0; i<23; i++)
  398. {
  399. a3[i] = t1[i] + t2[i]; // a3 = a1*b2 + b1*a2
  400. b3[i] = t0[i] - t3[i]; // b3 = b1*b2 - 1*a1*a2
  401. }
  402. degred(a3);
  403. degred(b3);
  404. coeffred_round_par(a3);
  405. coeffred_round_par(b3);
  406. fpe_set_doublearray(r1, a3);
  407. fpe_set_doublearray(r2, b3);
  408. _2fpe_to_fp2e(rop, r1, r2);
  409. }
  410. #endif
  411. #ifndef QHASM
  412. // Square an fp2e, store result in rop:
  413. void fp2e_square_c(fp2e_t rop, const fp2e_t op)
  414. {
  415. #ifdef N_OPS
  416. sqfp2ctr += 1;
  417. #endif
  418. fpe_t a1, b1, r1, r2;
  419. mydouble ropa[24], ropb[24];
  420. fp2e_to_2fpe(a1, b1, op);
  421. int i;
  422. /* CheckDoubles are not smart enough to recognize
  423. * binomial formula to compute b^2-a^2 */
  424. #ifdef CHECK
  425. mydouble d1[24];
  426. polymul(d1, a1->v, a1->v);
  427. polymul(ropb, b1->v, b1->v);
  428. polymul(ropa, b1->v, a1->v);
  429. for(i=0;i<23;i++)
  430. {
  431. ropb[i] -= d1[i];
  432. ropa[i] *= 2;
  433. }
  434. #else
  435. fpe_t t1, t2, t3;
  436. for(i=0;i<12;i++)
  437. {
  438. t1->v[i] = a1->v[i] + b1->v[i];
  439. t2->v[i] = b1->v[i] - a1->v[i];
  440. t3->v[i] = 2*b1->v[i];
  441. }
  442. polymul(ropa, a1->v, t3->v);
  443. polymul(ropb, t1->v, t2->v);
  444. #endif
  445. degred(ropa);
  446. degred(ropb);
  447. coeffred_round_par(ropa);
  448. coeffred_round_par(ropb);
  449. fpe_set_doublearray(r1, ropa);
  450. fpe_set_doublearray(r2, ropb);
  451. _2fpe_to_fp2e(rop, r1, r2);
  452. }
  453. #endif
  454. #ifndef QHASM
  455. // Multiply by xi=i+3 which is used to construct F_p^6
  456. // (a*i + b)*(i + 3) = (3*b - 1*a) + (3*a + b)*i
  457. void fp2e_mulxi_c(fp2e_t rop, const fp2e_t op)
  458. {
  459. #ifdef N_OPS
  460. mulxifp2ctr++;
  461. #endif
  462. fpe_t a, b, t1, t2, t3, t4, t5;
  463. fp2e_to_2fpe(a, b, op);
  464. int i;
  465. for(i=0; i<12; i++)
  466. {
  467. t1->v[i] = 3*a->v[i]; // t1 = 3*a
  468. t2->v[i] = 3*b->v[i]; // t2 = 3*b
  469. t3->v[i] = 1*a->v[i]; // t3 = 1*a
  470. }
  471. fpe_add(t4, t1, b); // t4 = 3*a + b
  472. fpe_sub(t5, t2, t3); // t5 = 3*b - 1*a
  473. _2fpe_to_fp2e(rop, t4, t5);
  474. }
  475. #endif
  476. // Scalar multiple of an fp2e, store result in rop:
  477. #ifndef QHASM
  478. void fp2e_mul_fpe_c(fp2e_t rop, const fp2e_t op1, const fpe_t op2)
  479. {
  480. #ifdef N_OPS
  481. mulfp2fpctr += 1;
  482. #endif
  483. fpe_t a1,b1;
  484. fp2e_to_2fpe(a1,b1,op1);
  485. fpe_mul(a1,a1,op2);
  486. fpe_mul(b1,b1,op2);
  487. _2fpe_to_fp2e(rop,a1,b1);
  488. }
  489. #endif
  490. #ifndef QHASM
  491. /* computes (op1->m_a*op2->m_a, op1->m_b*op2->m_b) */
  492. void fp2e_parallel_coeffmul_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
  493. {
  494. fpe_t a1, b1, a2, b2; // Needed for intermediary results
  495. fp2e_to_2fpe(a1,b1,op1);
  496. fp2e_to_2fpe(a2,b2,op2);
  497. fpe_mul(a1, a1, a2);
  498. fpe_mul(b1, b1, b2);
  499. _2fpe_to_fp2e(rop, a1, b1);
  500. }
  501. #endif
  502. // Inverse multiple of an fp2e, store result in rop:
  503. void fp2e_invert(fp2e_t rop, const fp2e_t op)
  504. {
  505. #ifdef N_OPS
  506. invfp2ctr += 1;
  507. #endif
  508. /* New version */
  509. fp2e_t d1, d2;
  510. int i;
  511. fp2e_parallel_coeffmul(d1, op, op);
  512. for(i=0;i<24;i+=2)
  513. d1->v[i] = d1->v[i+1] = d1->v[i] + d1->v[i+1];
  514. fp2e_short_coeffred(d1);
  515. for(i=0;i<24;i+=2)
  516. {
  517. d2->v[i] = op->v[i];
  518. d2->v[i+1] = -op->v[i+1];
  519. }
  520. fp2e_set(rop,d1);
  521. for(i = 254; i >= 0; i--)
  522. {
  523. fp2e_parallel_coeffmul(rop,rop,rop);
  524. if(scalar_getbit(bn_pminus2, i))
  525. fp2e_parallel_coeffmul(rop,rop,d1);
  526. }
  527. fp2e_parallel_coeffmul(rop,rop,d2);
  528. }
  529. // Print the fp2e:
  530. void fp2e_print(FILE *outfile, const fp2e_t op)
  531. {
  532. fpe_t a,b;
  533. fp2e_to_2fpe(a,b,op);
  534. fpe_print(outfile, a);
  535. fprintf(outfile," * X + ");
  536. fpe_print(outfile, b);
  537. }