llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
   3 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
   4 ; RUN:   FileCheck %s
   5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
   6 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
   7 ; RUN:   FileCheck %s
   8 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
   9 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
  10 ; RUN:   FileCheck %s
  11
  12 ; This test case aims to test the vector multiply instructions on Power10.
  13 ; This includes the low order and high order versions of vector multiply.
  14 ; The low order version operates on doublewords, whereas the high order version
  15 ; operates on signed and unsigned words and doublewords.
  16 ; This file also includes 128 bit vector multiply instructions.
  17
  18 define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
  19 ; CHECK-LABEL: test_vmulld:
  20 ; CHECK:       # %bb.0: # %entry
  21 ; CHECK-NEXT:    vmulld v2, v3, v2
  22 ; CHECK-NEXT:    blr
  23 entry:
  24   %mul = mul <2 x i64> %b, %a
  25   ret <2 x i64> %mul
  26 }
  27
  28 define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) {
  29 ; CHECK-LABEL: test_vmulhsd:
  30 ; CHECK:       # %bb.0: # %entry
  31 ; CHECK-NEXT:    vmulhsd v2, v3, v2
  32 ; CHECK-NEXT:    blr
  33 entry:
  34   %0 = sext <2 x i64> %a to <2 x i128>
  35   %1 = sext <2 x i64> %b to <2 x i128>
  36   %mul = mul <2 x i128> %1, %0
  37   %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
  38   %tr = trunc <2 x i128> %shr to <2 x i64>
  39   ret <2 x i64> %tr
  40 }
  41
  42 define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) {
  43 ; CHECK-LABEL: test_vmulhud:
  44 ; CHECK:       # %bb.0: # %entry
  45 ; CHECK-NEXT:    vmulhud v2, v3, v2
  46 ; CHECK-NEXT:    blr
  47 entry:
  48   %0 = zext <2 x i64> %a to <2 x i128>
  49   %1 = zext <2 x i64> %b to <2 x i128>
  50   %mul = mul <2 x i128> %1, %0
  51   %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
  52   %tr = trunc <2 x i128> %shr to <2 x i64>
  53   ret <2 x i64> %tr
  54 }
  55
  56 define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) {
  57 ; CHECK-LABEL: test_vmulhsw:
  58 ; CHECK:       # %bb.0: # %entry
  59 ; CHECK-NEXT:    vmulhsw v2, v3, v2
  60 ; CHECK-NEXT:    blr
  61 entry:
  62   %0 = sext <4 x i32> %a to <4 x i64>
  63   %1 = sext <4 x i32> %b to <4 x i64>
  64   %mul = mul <4 x i64> %1, %0
  65   %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
  66   %tr = trunc <4 x i64> %shr to <4 x i32>
  67   ret <4 x i32> %tr
  68 }
  69
  70 define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) {
  71 ; CHECK-LABEL: test_vmulhuw:
  72 ; CHECK:       # %bb.0: # %entry
  73 ; CHECK-NEXT:    vmulhuw v2, v3, v2
  74 ; CHECK-NEXT:    blr
  75 entry:
  76   %0 = zext <4 x i32> %a to <4 x i64>
  77   %1 = zext <4 x i32> %b to <4 x i64>
  78   %mul = mul <4 x i64> %1, %0
  79   %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
  80   %tr = trunc <4 x i64> %shr to <4 x i32>
  81   ret <4 x i32> %tr
  82 }
  83
  84 ; Test the vector multiply high intrinsics.
  85 declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
  86 declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
  87 declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>)
  88 declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>)
  89
  90 define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
  91 ; CHECK-LABEL: test_vmulhsw_intrinsic:
  92 ; CHECK:       # %bb.0: # %entry
  93 ; CHECK-NEXT:    vmulhsw v2, v2, v3
  94 ; CHECK-NEXT:    blr
  95 entry:
  96   %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
  97   ret <4 x i32> %mulh
  98 }
  99
 100 define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
 101 ; CHECK-LABEL: test_vmulhuw_intrinsic:
 102 ; CHECK:       # %bb.0: # %entry
 103 ; CHECK-NEXT:    vmulhuw v2, v2, v3
 104 ; CHECK-NEXT:    blr
 105 entry:
 106   %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
 107   ret <4 x i32> %mulh
 108 }
 109
 110 define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) {
 111 ; CHECK-LABEL: test_vmulhsd_intrinsic:
 112 ; CHECK:       # %bb.0: # %entry
 113 ; CHECK-NEXT:    vmulhsd v2, v2, v3
 114 ; CHECK-NEXT:    blr
 115 entry:
 116   %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b)
 117   ret <2 x i64> %mulh
 118 }
 119
 120 define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) {
 121 ; CHECK-LABEL: test_vmulhud_intrinsic:
 122 ; CHECK:       # %bb.0: # %entry
 123 ; CHECK-NEXT:    vmulhud v2, v2, v3
 124 ; CHECK-NEXT:    blr
 125 entry:
 126   %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
 127   ret <2 x i64> %mulh
 128 }
 129
 130 declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
 131 declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
 132 declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
 133 declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
 134 declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
 135
 136 define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
 137 ; CHECK-LABEL: test_vmuleud:
 138 ; CHECK:       # %bb.0:
 139 ; CHECK-NEXT:    vmuleud v2, v2, v3
 140 ; CHECK-NEXT:    blr
 141   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
 142   ret <1 x i128> %tmp
 143 }
 144
 145 define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
 146 ; CHECK-LABEL: test_vmuloud:
 147 ; CHECK:       # %bb.0:
 148 ; CHECK-NEXT:    vmuloud v2, v2, v3
 149 ; CHECK-NEXT:    blr
 150   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
 151   ret <1 x i128> %tmp
 152 }
 153
 154 define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
 155 ; CHECK-LABEL: test_vmulesd:
 156 ; CHECK:       # %bb.0:
 157 ; CHECK-NEXT:    vmulesd v2, v2, v3
 158 ; CHECK-NEXT:    blr
 159   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
 160   ret <1 x i128> %tmp
 161 }
 162
 163 define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
 164 ; CHECK-LABEL: test_vmulosd:
 165 ; CHECK:       # %bb.0:
 166 ; CHECK-NEXT:    vmulosd v2, v2, v3
 167 ; CHECK-NEXT:    blr
 168   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
 169   ret <1 x i128> %tmp
 170 }
 171
 172 define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
 173 ; CHECK-LABEL: test_vmsumcud:
 174 ; CHECK:       # %bb.0:
 175 ; CHECK-NEXT:    vmsumcud v2, v2, v3, v4
 176 ; CHECK-NEXT:    blr
 177   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
 178   ret <1 x i128> %tmp
 179 }