scalar_sub_nored.s 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. # File: dclxvi-20130329/scalar_sub_nored.s
  2. # Author: Ruben Niederhagen, Peter Schwabe
  3. # Public Domain
  4. # qhasm: int64 rp
  5. # qhasm: int64 xp
  6. # qhasm: int64 yp
  7. # qhasm: input rp
  8. # qhasm: input xp
  9. # qhasm: input yp
  10. # qhasm: int64 r0
  11. # qhasm: int64 r1
  12. # qhasm: int64 r2
  13. # qhasm: int64 r3
  14. # qhasm: int64 t0
  15. # qhasm: int64 t1
  16. # qhasm: int64 t2
  17. # qhasm: int64 t3
  18. # qhasm: int64 caller1
  19. # qhasm: int64 caller2
  20. # qhasm: int64 caller3
  21. # qhasm: int64 caller4
  22. # qhasm: int64 caller5
  23. # qhasm: int64 caller6
  24. # qhasm: int64 caller7
  25. # qhasm: caller caller1
  26. # qhasm: caller caller2
  27. # qhasm: caller caller3
  28. # qhasm: caller caller4
  29. # qhasm: caller caller5
  30. # qhasm: caller caller6
  31. # qhasm: caller caller7
  32. # qhasm: stack64 caller4_stack
  33. # qhasm: stack64 caller5_stack
  34. # qhasm: stack64 caller6_stack
  35. # qhasm: stack64 caller7_stack
  36. # qhasm: enter scalar_sub_nored
  37. .text
  38. .p2align 5
  39. .globl _scalar_sub_nored
  40. .globl scalar_sub_nored
  41. _scalar_sub_nored:
  42. scalar_sub_nored:
  43. mov %rsp,%r11
  44. and $31,%r11
  45. add $0,%r11
  46. sub %r11,%rsp
  47. # qhasm: r0 = *(uint64 *)(xp + 0)
  48. # asm 1: movq 0(<xp=int64#2),>r0=int64#4
  49. # asm 2: movq 0(<xp=%rsi),>r0=%rcx
  50. movq 0(%rsi),%rcx
  51. # qhasm: r1 = *(uint64 *)(xp + 8)
  52. # asm 1: movq 8(<xp=int64#2),>r1=int64#5
  53. # asm 2: movq 8(<xp=%rsi),>r1=%r8
  54. movq 8(%rsi),%r8
  55. # qhasm: r2 = *(uint64 *)(xp + 16)
  56. # asm 1: movq 16(<xp=int64#2),>r2=int64#6
  57. # asm 2: movq 16(<xp=%rsi),>r2=%r9
  58. movq 16(%rsi),%r9
  59. # qhasm: r3 = *(uint64 *)(xp + 24)
  60. # asm 1: movq 24(<xp=int64#2),>r3=int64#2
  61. # asm 2: movq 24(<xp=%rsi),>r3=%rsi
  62. movq 24(%rsi),%rsi
  63. # qhasm: carry? r0 -= *(uint64 *)(yp + 0)
  64. # asm 1: subq 0(<yp=int64#3),<r0=int64#4
  65. # asm 2: subq 0(<yp=%rdx),<r0=%rcx
  66. subq 0(%rdx),%rcx
  67. # qhasm: carry? r1 -= *(uint64 *)(yp + 8) - carry
  68. # asm 1: sbbq 8(<yp=int64#3),<r1=int64#5
  69. # asm 2: sbbq 8(<yp=%rdx),<r1=%r8
  70. sbbq 8(%rdx),%r8
  71. # qhasm: carry? r2 -= *(uint64 *)(yp + 16) - carry
  72. # asm 1: sbbq 16(<yp=int64#3),<r2=int64#6
  73. # asm 2: sbbq 16(<yp=%rdx),<r2=%r9
  74. sbbq 16(%rdx),%r9
  75. # qhasm: r3 -= *(uint64 *)(yp + 24) - carry
  76. # asm 1: sbbq 24(<yp=int64#3),<r3=int64#2
  77. # asm 2: sbbq 24(<yp=%rdx),<r3=%rsi
  78. sbbq 24(%rdx),%rsi
  79. # qhasm: *(uint64 *)(rp + 0) = r0
  80. # asm 1: movq <r0=int64#4,0(<rp=int64#1)
  81. # asm 2: movq <r0=%rcx,0(<rp=%rdi)
  82. movq %rcx,0(%rdi)
  83. # qhasm: *(uint64 *)(rp + 8) = r1
  84. # asm 1: movq <r1=int64#5,8(<rp=int64#1)
  85. # asm 2: movq <r1=%r8,8(<rp=%rdi)
  86. movq %r8,8(%rdi)
  87. # qhasm: *(uint64 *)(rp + 16) = r2
  88. # asm 1: movq <r2=int64#6,16(<rp=int64#1)
  89. # asm 2: movq <r2=%r9,16(<rp=%rdi)
  90. movq %r9,16(%rdi)
  91. # qhasm: *(uint64 *)(rp + 24) = r3
  92. # asm 1: movq <r3=int64#2,24(<rp=int64#1)
  93. # asm 2: movq <r3=%rsi,24(<rp=%rdi)
  94. movq %rsi,24(%rdi)
  95. # qhasm: leave
  96. add %r11,%rsp
  97. mov %rdi,%rax
  98. mov %rsi,%rdx
  99. ret