#include "fe.h" /* Preconditions: |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. Write p=2^255-19; q=floor(h/p). Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). Proof: Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). Then 0> 25; q = (h0 + q) >> 26; q = (h1 + q) >> 25; q = (h2 + q) >> 26; q = (h3 + q) >> 25; q = (h4 + q) >> 26; q = (h5 + q) >> 25; q = (h6 + q) >> 26; q = (h7 + q) >> 25; q = (h8 + q) >> 26; q = (h9 + q) >> 25; /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ h0 += 19 * q; /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ carry0 = h0 >> 26; h1 += carry0; h0 -= SHL32(carry0,26); carry1 = h1 >> 25; h2 += carry1; h1 -= SHL32(carry1,25); carry2 = h2 >> 26; h3 += carry2; h2 -= SHL32(carry2,26); carry3 = h3 >> 25; h4 += carry3; h3 -= SHL32(carry3,25); carry4 = h4 >> 26; h5 += carry4; h4 -= SHL32(carry4,26); carry5 = h5 >> 25; h6 += carry5; h5 -= SHL32(carry5,25); carry6 = h6 >> 26; h7 += carry6; h6 -= SHL32(carry6,26); carry7 = h7 >> 25; h8 += carry7; h7 -= SHL32(carry7,25); carry8 = h8 >> 26; h9 += carry8; h8 -= SHL32(carry8,26); carry9 = h9 >> 25; h9 -= SHL32(carry9,25); /* h10 = carry9 */ /* Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. Have h0+...+2^230 h9 between 0 and 2^255-1; evidently 2^255 h10-2^255 q = 0. Goal: Output h0+...+2^230 h9. */ s[0] = h0 >> 0; s[1] = h0 >> 8; s[2] = h0 >> 16; s[3] = (h0 >> 24) | SHL32(h1,2); s[4] = h1 >> 6; s[5] = h1 >> 14; s[6] = (h1 >> 22) | SHL32(h2,3); s[7] = h2 >> 5; s[8] = h2 >> 13; s[9] = (h2 >> 21) | SHL32(h3,5); s[10] = h3 >> 3; s[11] = h3 >> 11; s[12] = (h3 >> 19) | SHL32(h4,6); s[13] = h4 >> 2; s[14] = h4 >> 10; s[15] = h4 >> 18; s[16] = h5 >> 0; s[17] = h5 >> 8; s[18] = h5 >> 16; s[19] = (h5 >> 24) | SHL32(h6,1); s[20] = h6 >> 7; s[21] = h6 >> 15; s[22] = (h6 >> 23) | SHL32(h7,3); s[23] = h7 >> 5; s[24] = h7 >> 13; s[25] = (h7 >> 21) | SHL32(h8,4); s[26] = h8 >> 4; s[27] = h8 >> 12; s[28] = (h8 >> 20) | SHL32(h9,6); s[29] = h9 >> 2; s[30] = h9 >> 10; s[31] = h9 >> 18; }