oasm_lib.tcc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. #ifndef __OASM_LIB_TCC__
  2. #define __OASM_LIB_TCC__
  3. #include "foav.h"
  4. template<> inline void oswap_buffer<OSWAP_4>(unsigned char *dest, unsigned char *source, uint32_t , uint8_t flag)
  5. {
  6. #ifdef COUNT_OSWAPS
  7. OSWAP_COUNTER++;
  8. #endif
  9. #if 0
  10. oswap_buffer_byte_v2(dest, source, flag);
  11. #else
  12. __asm__ (
  13. "# inline oswap_buffer<OSWAP_4>\n"
  14. "test %[flag], %[flag]\n"
  15. "movl (%[dest]), %%r10d\n"
  16. "movl (%[dest]), %%r11d\n"
  17. "movl (%[source]), %%ecx\n"
  18. "cmovnz %%ecx, %%r10d\n"
  19. "cmovnz %%r11d, %%ecx\n"
  20. "movl %%r10d, (%[dest])\n"
  21. "movl %%ecx, (%[source])\n"
  22. :
  23. : [dest] "r" (dest), [source] "r" (source), [flag] "r" (flag)
  24. : "cc", "memory", "r10", "r11", "ecx"
  25. );
  26. #endif
  27. }
  28. template<> inline void oswap_buffer<OSWAP_8>(unsigned char *dest, unsigned char *source, uint32_t , uint8_t flag)
  29. {
  30. #ifdef COUNT_OSWAPS
  31. OSWAP_COUNTER++;
  32. #endif
  33. #if 0
  34. oswap_buffer_byte_v2(dest, source, flag);
  35. #else
  36. __asm__ (
  37. "# inline oswap_buffer<OSWAP_8>\n"
  38. "test %[flag], %[flag]\n"
  39. "movq (%[dest]), %%r10\n"
  40. "movq (%[dest]), %%r11\n"
  41. "movq (%[source]), %%rcx\n"
  42. "cmovnz %%rcx, %%r10\n"
  43. "cmovnz %%r11, %%rcx\n"
  44. "movq %%r10, (%[dest])\n"
  45. "movq %%rcx, (%[source])\n"
  46. :
  47. : [dest] "r" (dest), [source] "r" (source), [flag] "r" (flag)
  48. : "cc", "memory", "r10", "r11", "rcx"
  49. );
  50. #endif
  51. }
  52. template<> inline void oswap_buffer<OSWAP_12>(unsigned char *dest, unsigned char *source, uint32_t , uint8_t flag)
  53. {
  54. #ifdef COUNT_OSWAPS
  55. OSWAP_COUNTER++;
  56. #endif
  57. #if 0
  58. oswap_buffer_byte_v2(dest, source, flag);
  59. #else
  60. __asm__ (
  61. "# inline oswap_buffer<OSWAP_12>\n"
  62. "test %[flag], %[flag]\n"
  63. "movq (%[dest]), %%r14\n" // dest data
  64. "movq (%[dest]), %%r12\n" // dest data
  65. "movl 8(%[dest]), %%ebx\n" // dest data (next word)
  66. "movl 8(%[dest]), %%edx\n" // dest data (next word)
  67. "movq (%[source]), %%r15\n" // source data
  68. "movl 8(%[source]), %%r13d\n" // source data (next word)
  69. "cmovnz %%r15, %%r14\n" // r14 <- r15 based on the flag (C1)
  70. "cmovnz %%r13d, %%ebx\n" // rbx <- r13 based on the flag (C1')
  71. "cmovnz %%r12, %%r15\n" // r15 <- r12 based on the flag (C2)
  72. "cmovnz %%edx, %%r13d\n" // r13 <- rdx based on the flag (C2')
  73. "movq %%r14, (%[dest])\n" // dest gets back r14, which is source's data if flag is true from (C1)
  74. // else it gets back the same dest data
  75. "movl %%ebx, 8(%[dest])\n" // dest+8 gets back ebx, which is source+8's data if flag is true from
  76. // (C1'), else it gets back the same dest+8 data
  77. "movq %%r15, (%[source])\n" // source gets back r15, which is dest's original data if flag is true
  78. // from (C2), else it gets back the same B2 data
  79. "movl %%r13d, 8(%[source])\n" // source+8 gets back r13d, which is dest+8's original data if flag is
  80. // true from (C2'), else it gets back the same B2 data
  81. :
  82. : [dest] "r" (dest), [source] "r" (source), [flag] "r" (flag)
  83. : "cc", "memory", "rcx", "r12", "r13", "r14", "r15", "rbx", "rdx"
  84. );
  85. #endif
  86. }
  87. template<> inline void oswap_buffer<OSWAP_16X>(unsigned char *dest, unsigned char *source, uint32_t buffersize, uint8_t flag)
  88. {
  89. #ifdef COUNT_OSWAPS
  90. OSWAP_COUNTER++;
  91. #endif
  92. __asm__ (
  93. "# inline oswap_buffer<OSWAP_16X>\n"
  94. //Move ptr to dest and source buffers to r10 and r11
  95. "movq %[dest], %%r10\n"
  96. "movq %[source], %%r11\n"
  97. //Set loop parameters
  98. "movl %[buffersize], %%ecx\n"
  99. "shr $4, %%ecx\n"
  100. //Loop to fetch iter & res chunks till blk_size
  101. "1:\n"
  102. "test %[flag], %[flag]\n"
  103. "movq (%%r10), %%r14\n" // dest data
  104. "movq (%%r10), %%r12\n" // dest data
  105. "movq 8(%%r10), %%rbx\n" // dest data (next qword)
  106. "movq 8(%%r10), %%rdx\n" // dest data (next qword)
  107. "movq (%%r11), %%r15\n" // source data
  108. "movq 8(%%r11), %%r13\n" // source data (next qword)
  109. "cmovnz %%r15, %%r14\n" // r14 <- r15 based on the flag (C1)
  110. "cmovnz %%r13, %%rbx\n" // rbx <- r13 based on the flag (C1')
  111. "cmovnz %%r12, %%r15\n" // r15 <- r12 based on the flag (C2)
  112. "cmovnz %%rdx, %%r13\n" // r13 <- rdx based on the flag (C2')
  113. "movq %%r14, (%%r10)\n" // dest gets back r14, which is source's data if flag is true from (C1)
  114. // else it gets back the same dest data
  115. "movq %%rbx, 8(%%r10)\n" // dest+8 gets back rbx, which is source+8's data if flag is true from
  116. // (C1'), else it gets back the same dest+8 data
  117. "movq %%r15, (%%r11)\n" // source gets back r15, which is dest's original data if flag is true
  118. // from (C2), else it gets back the same B2 data
  119. "movq %%r13, 8(%%r11)\n" // source+8 gets back r13, which is dest+8's original data if flag is
  120. // true from (C2'), else it gets back the same B2 data
  121. "add $16, %%r10\n"
  122. "add $16, %%r11\n"
  123. "dec %%ecx\n"
  124. "# FOAV oswap_buffer_16X ctr (%%ecx)\n"
  125. "jnz 1b\n"
  126. :
  127. : [dest] "r" (dest), [source] "r" (source), [buffersize] "r" (buffersize), [flag] "r" (flag)
  128. : "cc", "memory", "r10", "r11", "rcx", "r12", "r13", "r14", "r15", "rbx", "rdx"
  129. );
  130. }
  131. template<> inline void oswap_buffer<OSWAP_8_16X>(unsigned char *dest, unsigned char *source, uint32_t buffersize, uint8_t flag)
  132. {
  133. #ifdef COUNT_OSWAPS
  134. OSWAP_COUNTER++;
  135. #endif
  136. __asm__ (
  137. "# inline oswap_buffer<OSWAP_8_16X>\n"
  138. //Move ptr to dest and source buffers to r10 and r11
  139. "movq %[dest], %%r10\n"
  140. "movq %[source], %%r11\n"
  141. // Move first 8 bytes obliviously:
  142. "test %[flag], %[flag]\n"
  143. "movq (%%r10), %%r14\n" // dest data
  144. "movq (%%r10), %%r12\n" // dest data
  145. "movq (%%r11), %%r15\n" // source data
  146. "cmovnz %%r15, %%r14\n" // r14 <- r15 based on the flag (C1)
  147. "cmovnz %%r12, %%r15\n" // r15 <- r12 based on the flag (C2)
  148. "movq %%r14, (%%r10)\n" // dest gets back r14, which is source's data if flag is true from (C1)
  149. // else it gets back the same dest data
  150. "movq %%r15, (%%r11)\n" // source gets back r15, which is dest's original data if flag is true
  151. // from (C2), else it gets back the same B2 data
  152. "add $8, %%r10\n"
  153. "add $8, %%r11\n"
  154. //Set loop parameters
  155. "movl %[buffersize], %%ecx\n"
  156. "shr $4, %%ecx\n"
  157. //Loop to fetch iter & res chunks till blk_size
  158. "1:\n"
  159. "test %[flag], %[flag]\n"
  160. "movq (%%r10), %%r14\n" // dest data
  161. "movq (%%r10), %%r12\n" // dest data
  162. "movq 8(%%r10), %%rbx\n" // dest data (next qword)
  163. "movq 8(%%r10), %%rdx\n" // dest data (next qword)
  164. "movq (%%r11), %%r15\n" // source data
  165. "movq 8(%%r11), %%r13\n" // source data (next qword)
  166. "cmovnz %%r15, %%r14\n" // r14 <- r15 based on the flag (C1)
  167. "cmovnz %%r13, %%rbx\n" // rbx <- r13 based on the flag (C1')
  168. "cmovnz %%r12, %%r15\n" // r15 <- r12 based on the flag (C2)
  169. "cmovnz %%rdx, %%r13\n" // r13 <- rdx based on the flag (C2')
  170. "movq %%r14, (%%r10)\n" // dest gets back r14, which is source's data if flag is true from (C1)
  171. // else it gets back the same dest data
  172. "movq %%rbx, 8(%%r10)\n" // dest+8 gets back rbx, which is source+8's data if flag is true from
  173. // (C1'), else it gets back the same dest+8 data
  174. "movq %%r15, (%%r11)\n" // source gets back r15, which is dest's original data if flag is true
  175. // from (C2), else it gets back the same B2 data
  176. "movq %%r13, 8(%%r11)\n" // source+8 gets back r13, which is dest+8's original data if flag is
  177. // true from (C2'), else it gets back the same B2 data
  178. "add $16, %%r10\n"
  179. "add $16, %%r11\n"
  180. "dec %%ecx\n"
  181. " # FOAV oswap_buffer_16X ctr (%%ecx)\n"
  182. "jnz 1b\n"
  183. :
  184. : [dest] "r" (dest), [source] "r" (source), [buffersize] "r" (buffersize), [flag] "r" (flag)
  185. : "cc", "memory", "r10", "r11", "rcx", "r12", "r13", "r14", "r15", "rbx", "rdx"
  186. );
  187. }
  188. template<> inline void oswap_key<uint32_t>(unsigned char *dest, unsigned char *source, uint8_t flag)
  189. {
  190. oswap_buffer<OSWAP_4>(dest, source, 4, flag);
  191. }
  192. template<> inline void oswap_key<uint64_t>(unsigned char *dest, unsigned char *source, uint8_t flag)
  193. {
  194. oswap_buffer<OSWAP_8>(dest, source, 8, flag);
  195. }
  196. template<> inline void oswap_key<__uint128_t>(unsigned char *dest, unsigned char *source, uint8_t flag)
  197. {
  198. oswap_buffer<OSWAP_16X>(dest, source, 16, flag);
  199. }
  200. template<> inline void omove_buffer<OSWAP_8>(unsigned char *dest, unsigned char *source, uint32_t , uint8_t flag)
  201. {
  202. __asm__ (
  203. "# inline omove_buffer<OSWAP_8>\n"
  204. "test %[flag], %[flag]\n"
  205. "movq (%[dest]), %%r10\n"
  206. //"movq (%[source]), %%rcx\n"
  207. "cmovnz (%[source]), %%r10\n"
  208. "movq %%r10, (%[dest])\n"
  209. :
  210. : [dest] "r" (dest), [source] "r" (source), [flag] "r" (flag)
  211. : "cc", "memory", "r10"
  212. );
  213. }
  214. template<> inline void omove_buffer<OSWAP_16X>(unsigned char *dest, unsigned char *source, uint32_t buffersize, uint8_t flag)
  215. {
  216. __asm__ (
  217. "# inline omove_buffer<OSWAP_16X>\n"
  218. //Move ptr to dest and source buffers to r10 and r11
  219. "movq %[dest], %%r10\n"
  220. "movq %[source], %%r11\n"
  221. //Set loop parameters
  222. "movl %[buffersize], %%ecx\n"
  223. "shr $4, %%ecx\n"
  224. //Loop to fetch iter & res chunks till blk_size
  225. "1:\n"
  226. "test %[flag], %[flag]\n"
  227. "movq (%%r10), %%r14\n" // dest data
  228. "movq 8(%%r10), %%rbx\n" // dest data (next qword)
  229. "movq (%%r11), %%r15\n" // source data
  230. "movq 8(%%r11), %%r13\n" // source data (next qword)
  231. "cmovnz %%r15, %%r14\n" // r14 <- r15 based on the flag (C1)
  232. "cmovnz %%r13, %%rbx\n" // rbx <- r13 based on the flag (C1')
  233. "movq %%r14, (%%r10)\n" // dest gets back r14, which is source's data if flag is true from (C1)
  234. // else it gets back the same dest data
  235. "movq %%rbx, 8(%%r10)\n" // dest+8 gets back rbx, which is source+8's data if flag is true from
  236. // (C1'), else it gets back the same dest+8 data
  237. "add $16, %%r10\n"
  238. "add $16, %%r11\n"
  239. "dec %%ecx\n"
  240. " # FOAV oswap_buffer_16X ctr (%%ecx)\n"
  241. "jnz 1b\n"
  242. :
  243. : [dest] "r" (dest), [source] "r" (source), [buffersize] "r" (buffersize), [flag] "r" (flag)
  244. : "cc", "memory", "r10", "r11", "rcx", "r13", "r14", "r15", "rbx"
  245. );
  246. }
  247. template<> inline void omove_buffer<OSWAP_8_16X>(unsigned char *dest, unsigned char *source, uint32_t buffersize, uint8_t flag)
  248. {
  249. __asm__ (
  250. "# inline omove_buffer<OSWAP_8_16X>\n"
  251. //Move ptr to dest and source buffers to r10 and r11
  252. "movq %[dest], %%r10\n"
  253. "movq %[source], %%r11\n"
  254. // Move first 8 bytes obliviously:
  255. "test %[flag], %[flag]\n"
  256. "movq (%%r10), %%r14\n" // dest data
  257. "movq (%%r11), %%r15\n" // source data
  258. "cmovnz %%r15, %%r14\n" // r14 <- r15 based on the flag (C1)
  259. "movq %%r14, (%%r10)\n" // dest gets back r14, which is source's data if flag is true from (C1)
  260. // else it gets back the same dest data
  261. "add $8, %%r10\n"
  262. "add $8, %%r11\n"
  263. //Set loop parameters
  264. "movl %[buffersize], %%ecx\n"
  265. "shr $4, %%ecx\n"
  266. //Loop to fetch iter & res chunks till blk_size
  267. "1:\n"
  268. "test %[flag], %[flag]\n"
  269. "movq (%%r10), %%r14\n" // dest data
  270. "movq 8(%%r10), %%rbx\n" // dest data (next qword)
  271. "movq (%%r11), %%r15\n" // source data
  272. "movq 8(%%r11), %%r13\n" // source data (next qword)
  273. "cmovnz %%r15, %%r14\n" // r14 <- r15 based on the flag (C1)
  274. "cmovnz %%r13, %%rbx\n" // rbx <- r13 based on the flag (C1')
  275. "movq %%r14, (%%r10)\n" // dest gets back r14, which is source's data if flag is true from (C1)
  276. // else it gets back the same dest data
  277. "movq %%rbx, 8(%%r10)\n" // dest+8 gets back rbx, which is source+8's data if flag is true from
  278. // (C1'), else it gets back the same dest+8 data
  279. "add $16, %%r10\n"
  280. "add $16, %%r11\n"
  281. "dec %%ecx\n"
  282. " # FOAV oswap_buffer_16X ctr (%%ecx)\n"
  283. "jnz 1b\n"
  284. :
  285. : [dest] "r" (dest), [source] "r" (source), [buffersize] "r" (buffersize), [flag] "r" (flag)
  286. : "cc", "memory", "r10", "r11", "rcx", "r13", "r14", "r15", "rbx"
  287. );
  288. }
  289. /*
  290. omove_buffer:
  291. ; Take inputs, 1 ptr to dest_buffer, 2 ptr to source_buffer, 3 buffer_size, 4 flag
  292. ; Linux : rdi,rsi,rdx,rcx->rbp
  293. ; Callee-saved : RBP, RBX, and R12–R15
  294. push rbx
  295. push rbp
  296. push r12
  297. push r13
  298. push r14
  299. push r15
  300. ; Move ptr to data from serialized_dest_block and serialized_source_blk
  301. mov r10, rdi
  302. mov r11, rsi
  303. ;RCX will be lost for loop, store flag from rcx to rbp (1 byte , so bpl)
  304. mov bpl, cl
  305. ; Oblivious evaluation of flag
  306. cmp bpl, 1
  307. ;Set loop parameters
  308. mov ax, dx
  309. xor rdx, rdx
  310. mov bx, 8
  311. div bx
  312. mov cx, ax
  313. ; Loop to fetch iter & res chunks till blk_size
  314. loopstart_omb:
  315. cmp bpl, 1
  316. mov r14, qword [r10]
  317. mov r15, qword [r11]
  318. cmovz r14, r15 ;r14 / r15 based on the compare
  319. mov qword [r10], r14
  320. add r10, 8
  321. add r11, 8
  322. loop loopstart_omb
  323. pop r15
  324. pop r14
  325. pop r13
  326. pop r12
  327. pop rbp
  328. pop rbx
  329. ret
  330. */
  331. #endif