keccak-tiny-unrolled.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /** libkeccak-tiny
  2. *
  3. * A single-file implementation of SHA-3 and SHAKE.
  4. *
  5. * Implementor: David Leon Gil
  6. * License: CC0, attribution kindly requested. Blame taken too,
  7. * but not liability.
  8. */
  9. #include "keccak-tiny.h"
  10. #include <string.h>
  11. #include "crypto.h"
  12. /******** Endianness conversion helpers ********/
  13. static inline uint64_t
  14. loadu64le(const unsigned char *x) {
  15. uint64_t r = 0;
  16. size_t i;
  17. for (i = 0; i < 8; ++i) {
  18. r |= (uint64_t)x[i] << 8 * i;
  19. }
  20. return r;
  21. }
  22. static inline void
  23. storeu64le(uint8_t *x, uint64_t u) {
  24. size_t i;
  25. for(i=0; i<8; ++i) {
  26. x[i] = u;
  27. u >>= 8;
  28. }
  29. }
  30. /******** The Keccak-f[1600] permutation ********/
  31. /*** Constants. ***/
  32. static const uint8_t rho[24] = \
  33. { 1, 3, 6, 10, 15, 21,
  34. 28, 36, 45, 55, 2, 14,
  35. 27, 41, 56, 8, 25, 43,
  36. 62, 18, 39, 61, 20, 44};
  37. static const uint8_t pi[24] = \
  38. {10, 7, 11, 17, 18, 3,
  39. 5, 16, 8, 21, 24, 4,
  40. 15, 23, 19, 13, 12, 2,
  41. 20, 14, 22, 9, 6, 1};
  42. static const uint64_t RC[24] = \
  43. {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
  44. 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
  45. 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
  46. 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
  47. 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
  48. 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
  49. /*** Helper macros to unroll the permutation. ***/
  50. #define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
  51. #define REPEAT6(e) e e e e e e
  52. #define REPEAT24(e) REPEAT6(e e e e)
  53. #define REPEAT5(e) e e e e e
  54. #define FOR5(v, s, e) \
  55. v = 0; \
  56. REPEAT5(e; v += s;)
  57. /*** Keccak-f[1600] ***/
  58. static inline void keccakf(void* state) {
  59. uint64_t* a = (uint64_t*)state;
  60. uint64_t b[5] = {0};
  61. uint64_t t = 0;
  62. uint8_t x, y, i = 0;
  63. REPEAT24(
  64. // Theta
  65. FOR5(x, 1,
  66. b[x] = 0;
  67. FOR5(y, 5,
  68. b[x] ^= a[x + y]; ))
  69. FOR5(x, 1,
  70. FOR5(y, 5,
  71. a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
  72. // Rho and pi
  73. t = a[1];
  74. x = 0;
  75. REPEAT24(b[0] = a[pi[x]];
  76. a[pi[x]] = rol(t, rho[x]);
  77. t = b[0];
  78. x++; )
  79. // Chi
  80. FOR5(y,
  81. 5,
  82. FOR5(x, 1,
  83. b[x] = a[y + x];)
  84. FOR5(x, 1,
  85. a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
  86. // Iota
  87. a[0] ^= RC[i];
  88. i++; )
  89. }
  90. /******** The FIPS202-defined functions. ********/
  91. /*** Some helper macros. ***/
  92. // `xorin` modified to handle Big Endian systems, `buf` being unaligned on
  93. // systems that care about such things. Assumes that len is a multiple of 8,
  94. // which is always true for the rates we use, and the modified finalize.
  95. static inline void
  96. xorin8(uint8_t *dst, const uint8_t *src, size_t len) {
  97. uint64_t* a = (uint64_t*)dst; // Always aligned.
  98. for (size_t i = 0; i < len; i += 8) {
  99. a[i/8] ^= loadu64le(src + i);
  100. }
  101. }
  102. // `setout` likewise modified to handle Big Endian systems. Assumes that len
  103. // is a multiple of 8, which is true for every rate we use.
  104. static inline void
  105. setout8(const uint8_t *src, uint8_t *dst, size_t len) {
  106. const uint64_t *si = (const uint64_t*)src; // Always aligned.
  107. for (size_t i = 0; i < len; i+= 8) {
  108. storeu64le(dst+i, si[i/8]);
  109. }
  110. }
  111. #define P keccakf
  112. #define Plen KECCAK_MAX_RATE
  113. #define KECCAK_DELIM_DIGEST 0x06
  114. #define KECCAK_DELIM_XOF 0x1f
  115. // Fold P*F over the full blocks of an input.
  116. #define foldP(I, L, F) \
  117. while (L >= s->rate) { \
  118. F(s->a, I, s->rate); \
  119. P(s->a); \
  120. I += s->rate; \
  121. L -= s->rate; \
  122. }
  123. static inline void
  124. keccak_absorb_blocks(keccak_state *s, const uint8_t *buf, size_t nr_blocks)
  125. {
  126. size_t blen = nr_blocks * s->rate;
  127. foldP(buf, blen, xorin8);
  128. }
  129. static int
  130. keccak_update(keccak_state *s, const uint8_t *buf, size_t len)
  131. {
  132. if (s->finalized)
  133. return -1;
  134. if ((buf == NULL) && len != 0)
  135. return -1;
  136. size_t remaining = len;
  137. while (remaining > 0) {
  138. if (s->offset == 0) {
  139. const size_t blocks = remaining / s->rate;
  140. size_t direct_bytes = blocks * s->rate;
  141. if (direct_bytes > 0) {
  142. keccak_absorb_blocks(s, buf, blocks);
  143. remaining -= direct_bytes;
  144. buf += direct_bytes;
  145. }
  146. }
  147. const size_t buf_avail = s->rate - s->offset;
  148. const size_t buf_bytes = (buf_avail > remaining) ? remaining : buf_avail;
  149. if (buf_bytes > 0) {
  150. memcpy(&s->block[s->offset], buf, buf_bytes);
  151. s->offset += buf_bytes;
  152. remaining -= buf_bytes;
  153. buf += buf_bytes;
  154. }
  155. if (s->offset == s->rate) {
  156. keccak_absorb_blocks(s, s->block, 1);
  157. s->offset = 0;
  158. }
  159. }
  160. return 0;
  161. }
  162. static void
  163. keccak_finalize(keccak_state *s)
  164. {
  165. // Xor in the DS and pad frame.
  166. s->block[s->offset++] = s->delim; // DS.
  167. for (size_t i = s->offset; i < s->rate; i++) {
  168. s->block[i] = 0;
  169. }
  170. s->block[s->rate - 1] |= 0x80; // Pad frame.
  171. // Xor in the last block.
  172. xorin8(s->a, s->block, s->rate);
  173. memwipe(s->block, 0, sizeof(s->block));
  174. s->finalized = 1;
  175. s->offset = s->rate;
  176. }
  177. static inline void
  178. keccak_squeeze_blocks(keccak_state *s, uint8_t *out, size_t nr_blocks)
  179. {
  180. for (size_t n = 0; n < nr_blocks; n++) {
  181. keccakf(s->a);
  182. setout8(s->a, out, s->rate);
  183. out += s->rate;
  184. }
  185. }
  186. static int
  187. keccak_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
  188. {
  189. if (!s->finalized)
  190. return -1;
  191. size_t remaining = outlen;
  192. while (remaining > 0) {
  193. if (s->offset == s->rate) {
  194. const size_t blocks = remaining / s->rate;
  195. const size_t direct_bytes = blocks * s->rate;
  196. if (blocks > 0) {
  197. keccak_squeeze_blocks(s, out, blocks);
  198. out += direct_bytes;
  199. remaining -= direct_bytes;
  200. }
  201. if (remaining > 0) {
  202. keccak_squeeze_blocks(s, s->block, 1);
  203. s->offset = 0;
  204. }
  205. }
  206. const size_t buf_bytes = s->rate - s->offset;
  207. const size_t indirect_bytes = (buf_bytes > remaining) ? remaining : buf_bytes;
  208. if (indirect_bytes > 0) {
  209. memcpy(out, &s->block[s->offset], indirect_bytes);
  210. out += indirect_bytes;
  211. s->offset += indirect_bytes;
  212. remaining -= indirect_bytes;
  213. }
  214. }
  215. return 0;
  216. }
  217. int
  218. keccak_digest_init(keccak_state *s, size_t bits)
  219. {
  220. if (s == NULL)
  221. return -1;
  222. if (bits != 224 && bits != 256 && bits != 384 && bits != 512)
  223. return -1;
  224. keccak_cleanse(s);
  225. s->rate = KECCAK_RATE(bits);
  226. s->delim = KECCAK_DELIM_DIGEST;
  227. return 0;
  228. }
  229. int
  230. keccak_digest_update(keccak_state *s, const uint8_t *buf, size_t len)
  231. {
  232. if (s == NULL)
  233. return -1;
  234. if (s->delim != KECCAK_DELIM_DIGEST)
  235. return -1;
  236. return keccak_update(s, buf, len);
  237. }
  238. int
  239. keccak_digest_sum(const keccak_state *s, uint8_t *out, size_t outlen)
  240. {
  241. if (s == NULL)
  242. return -1;
  243. if (s->delim != KECCAK_DELIM_DIGEST)
  244. return -1;
  245. if (out == NULL || outlen > 4 * (KECCAK_MAX_RATE - s->rate) / 8)
  246. return -1;
  247. // Work in a copy so that incremental/rolling hashes are easy.
  248. keccak_state s_tmp;
  249. keccak_clone(&s_tmp, s);
  250. keccak_finalize(&s_tmp);
  251. int ret = keccak_squeeze(&s_tmp, out, outlen);
  252. keccak_cleanse(&s_tmp);
  253. return ret;
  254. }
  255. int
  256. keccak_xof_init(keccak_state *s, size_t bits)
  257. {
  258. if (s == NULL)
  259. return -1;
  260. if (bits != 128 && bits != 256)
  261. return -1;
  262. keccak_cleanse(s);
  263. s->rate = KECCAK_RATE(bits);
  264. s->delim = KECCAK_DELIM_XOF;
  265. return 0;
  266. }
  267. int
  268. keccak_xof_absorb(keccak_state *s, const uint8_t *buf, size_t len)
  269. {
  270. if (s == NULL)
  271. return -1;
  272. if (s->delim != KECCAK_DELIM_XOF)
  273. return -1;
  274. return keccak_update(s, buf, len);
  275. }
  276. int
  277. keccak_xof_squeeze(keccak_state *s, uint8_t *out, size_t outlen)
  278. {
  279. if (s == NULL)
  280. return -1;
  281. if (s->delim != KECCAK_DELIM_XOF)
  282. return -1;
  283. if (!s->finalized)
  284. keccak_finalize(s);
  285. return keccak_squeeze(s, out, outlen);
  286. }
  287. void
  288. keccak_clone(keccak_state *out, const keccak_state *in)
  289. {
  290. memcpy(out, in, sizeof(keccak_state));
  291. }
  292. void
  293. keccak_cleanse(keccak_state *s)
  294. {
  295. memwipe(s, 0, sizeof(keccak_state));
  296. }
  297. /** The sponge-based hash construction. **/
  298. static inline int hash(uint8_t* out, size_t outlen,
  299. const uint8_t* in, size_t inlen,
  300. size_t bits, uint8_t delim) {
  301. if ((out == NULL) || ((in == NULL) && inlen != 0)) {
  302. return -1;
  303. }
  304. int ret = 0;
  305. keccak_state s;
  306. keccak_cleanse(&s);
  307. switch (delim) {
  308. case KECCAK_DELIM_DIGEST:
  309. ret |= keccak_digest_init(&s, bits);
  310. ret |= keccak_digest_update(&s, in, inlen);
  311. // Use the internal API instead of sum to avoid the memcpy.
  312. keccak_finalize(&s);
  313. ret |= keccak_squeeze(&s, out, outlen);
  314. break;
  315. case KECCAK_DELIM_XOF:
  316. ret |= keccak_xof_init(&s, bits);
  317. ret |= keccak_xof_absorb(&s, in, inlen);
  318. ret |= keccak_xof_squeeze(&s, out, outlen);
  319. break;
  320. default:
  321. return -1;
  322. }
  323. keccak_cleanse(&s);
  324. return ret;
  325. }
  326. /*** Helper macros to define SHA3 and SHAKE instances. ***/
  327. #define defshake(bits) \
  328. int shake##bits(uint8_t* out, size_t outlen, \
  329. const uint8_t* in, size_t inlen) { \
  330. return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_XOF); \
  331. }
  332. #define defsha3(bits) \
  333. int sha3_##bits(uint8_t* out, size_t outlen, \
  334. const uint8_t* in, size_t inlen) { \
  335. if (outlen > (bits/8)) { \
  336. return -1; \
  337. } \
  338. return hash(out, outlen, in, inlen, bits, KECCAK_DELIM_DIGEST); \
  339. }
  340. /*** FIPS202 SHAKE VOFs ***/
  341. defshake(128)
  342. defshake(256)
  343. /*** FIPS202 SHA3 FOFs ***/
  344. defsha3(224)
  345. defsha3(256)
  346. defsha3(384)
  347. defsha3(512)