test/CodeGen/X86/sha.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
   3 ; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
   4
   5 declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
   6
   7 define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
   8 ; CHECK-LABEL: test_sha1rnds4rr:
   9 ; CHECK:       # %bb.0: # %entry
  10 ; CHECK-NEXT:    sha1rnds4 $3, %xmm1, %xmm0
  11 ; CHECK-NEXT:    retq
  12 entry:
  13   %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
  14   ret <4 x i32> %0
  15 }
  16
  17 define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
  18 ; CHECK-LABEL: test_sha1rnds4rm:
  19 ; CHECK:       # %bb.0: # %entry
  20 ; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
  21 ; CHECK-NEXT:    retq
  22 entry:
  23   %0 = load <4 x i32>, <4 x i32>* %b
  24   %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
  25   ret <4 x i32> %1
  26 }
  27
  28 declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
  29
  30 define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
  31 ; CHECK-LABEL: test_sha1nexterr:
  32 ; CHECK:       # %bb.0: # %entry
  33 ; CHECK-NEXT:    sha1nexte %xmm1, %xmm0
  34 ; CHECK-NEXT:    retq
  35 entry:
  36   %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
  37   ret <4 x i32> %0
  38 }
  39
  40 define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
  41 ; CHECK-LABEL: test_sha1nexterm:
  42 ; CHECK:       # %bb.0: # %entry
  43 ; CHECK-NEXT:    sha1nexte (%rdi), %xmm0
  44 ; CHECK-NEXT:    retq
  45 entry:
  46   %0 = load <4 x i32>, <4 x i32>* %b
  47   %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
  48   ret <4 x i32> %1
  49 }
  50
  51 declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
  52
  53 define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
  54 ; CHECK-LABEL: test_sha1msg1rr:
  55 ; CHECK:       # %bb.0: # %entry
  56 ; CHECK-NEXT:    sha1msg1 %xmm1, %xmm0
  57 ; CHECK-NEXT:    retq
  58 entry:
  59   %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
  60   ret <4 x i32> %0
  61 }
  62
  63 define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
  64 ; CHECK-LABEL: test_sha1msg1rm:
  65 ; CHECK:       # %bb.0: # %entry
  66 ; CHECK-NEXT:    sha1msg1 (%rdi), %xmm0
  67 ; CHECK-NEXT:    retq
  68 entry:
  69   %0 = load <4 x i32>, <4 x i32>* %b
  70   %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
  71   ret <4 x i32> %1
  72 }
  73
  74 declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
  75
  76 define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
  77 ; CHECK-LABEL: test_sha1msg2rr:
  78 ; CHECK:       # %bb.0: # %entry
  79 ; CHECK-NEXT:    sha1msg2 %xmm1, %xmm0
  80 ; CHECK-NEXT:    retq
  81 entry:
  82   %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
  83   ret <4 x i32> %0
  84 }
  85
  86 define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
  87 ; CHECK-LABEL: test_sha1msg2rm:
  88 ; CHECK:       # %bb.0: # %entry
  89 ; CHECK-NEXT:    sha1msg2 (%rdi), %xmm0
  90 ; CHECK-NEXT:    retq
  91 entry:
  92   %0 = load <4 x i32>, <4 x i32>* %b
  93   %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
  94   ret <4 x i32> %1
  95 }
  96
  97 declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
  98
  99 define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
 100 ; SSE-LABEL: test_sha256rnds2rr:
 101 ; SSE:       # %bb.0: # %entry
 102 ; SSE-NEXT:    movaps %xmm0, %xmm3
 103 ; SSE-NEXT:    movaps %xmm2, %xmm0
 104 ; SSE-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3
 105 ; SSE-NEXT:    movaps %xmm3, %xmm0
 106 ; SSE-NEXT:    retq
 107 ;
 108 ; AVX-LABEL: test_sha256rnds2rr:
 109 ; AVX:       # %bb.0: # %entry
 110 ; AVX-NEXT:    vmovaps %xmm0, %xmm3
 111 ; AVX-NEXT:    vmovaps %xmm2, %xmm0
 112 ; AVX-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3
 113 ; AVX-NEXT:    vmovaps %xmm3, %xmm0
 114 ; AVX-NEXT:    retq
 115 entry:
 116   %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
 117   ret <4 x i32> %0
 118 }
 119
 120 define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable {
 121 ; SSE-LABEL: test_sha256rnds2rm:
 122 ; SSE:       # %bb.0: # %entry
 123 ; SSE-NEXT:    movaps %xmm0, %xmm2
 124 ; SSE-NEXT:    movaps %xmm1, %xmm0
 125 ; SSE-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2
 126 ; SSE-NEXT:    movaps %xmm2, %xmm0
 127 ; SSE-NEXT:    retq
 128 ;
 129 ; AVX-LABEL: test_sha256rnds2rm:
 130 ; AVX:       # %bb.0: # %entry
 131 ; AVX-NEXT:    vmovaps %xmm0, %xmm2
 132 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
 133 ; AVX-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2
 134 ; AVX-NEXT:    vmovaps %xmm2, %xmm0
 135 ; AVX-NEXT:    retq
 136 entry:
 137   %0 = load <4 x i32>, <4 x i32>* %b
 138   %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
 139   ret <4 x i32> %1
 140 }
 141
 142 declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
 143
 144 define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
 145 ; CHECK-LABEL: test_sha256msg1rr:
 146 ; CHECK:       # %bb.0: # %entry
 147 ; CHECK-NEXT:    sha256msg1 %xmm1, %xmm0
 148 ; CHECK-NEXT:    retq
 149 entry:
 150   %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
 151   ret <4 x i32> %0
 152 }
 153
 154 define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 155 ; CHECK-LABEL: test_sha256msg1rm:
 156 ; CHECK:       # %bb.0: # %entry
 157 ; CHECK-NEXT:    sha256msg1 (%rdi), %xmm0
 158 ; CHECK-NEXT:    retq
 159 entry:
 160   %0 = load <4 x i32>, <4 x i32>* %b
 161   %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
 162   ret <4 x i32> %1
 163 }
 164
 165 declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
 166
 167 define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
 168 ; CHECK-LABEL: test_sha256msg2rr:
 169 ; CHECK:       # %bb.0: # %entry
 170 ; CHECK-NEXT:    sha256msg2 %xmm1, %xmm0
 171 ; CHECK-NEXT:    retq
 172 entry:
 173   %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
 174   ret <4 x i32> %0
 175 }
 176
 177 define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 178 ; CHECK-LABEL: test_sha256msg2rm:
 179 ; CHECK:       # %bb.0: # %entry
 180 ; CHECK-NEXT:    sha256msg2 (%rdi), %xmm0
 181 ; CHECK-NEXT:    retq
 182 entry:
 183   %0 = load <4 x i32>, <4 x i32>* %b
 184   %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
 185   ret <4 x i32> %1
 186 }
 187
 188 ; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
 189 define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 190 ; SSE-LABEL: test_sha1rnds4_zero_extend:
 191 ; SSE:       # %bb.0: # %entry
 192 ; SSE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
 193 ; SSE-NEXT:    xorps %xmm1, %xmm1
 194 ; SSE-NEXT:    retq
 195 ;
 196 ; AVX-LABEL: test_sha1rnds4_zero_extend:
 197 ; AVX:       # %bb.0: # %entry
 198 ; AVX-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
 199 ; AVX-NEXT:    vmovaps %xmm0, %xmm0
 200 ; AVX-NEXT:    retq
 201 entry:
 202   %0 = load <4 x i32>, <4 x i32>* %b
 203   %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
 204   %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 205   ret <8 x i32> %2
 206 }