llvm/test/CodeGen/AArch64/sve2-int-mul.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
   2
   3 ;
   4 ; MUL with SPLAT
   5 ;
   6 define <vscale x 8 x i16> @mul_i16_imm(<vscale x 8 x i16> %a) {
   7 ; CHECK-LABEL: mul_i16_imm
   8 ; CHECK: mov w[[W:[0-9]+]], #255
   9 ; CHECK-NEXT: mov z1.h, w[[W]]
  10 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
  11   %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
  12   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
  13   %res = mul <vscale x 8 x i16> %a, %splat
  14   ret <vscale x 8 x i16> %res
  15 }
  16
  17 define <vscale x 8 x i16> @mul_i16_imm_neg(<vscale x 8 x i16> %a) {
  18 ; CHECK-LABEL: mul_i16_imm_neg
  19 ; CHECK: mov w[[W:[0-9]+]], #-200
  20 ; CHECK-NEXT: mov z1.h, w[[W]]
  21 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
  22   %elt = insertelement <vscale x 8 x i16> undef, i16 -200, i32 0
  23   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
  24   %res = mul <vscale x 8 x i16> %a, %splat
  25   ret <vscale x 8 x i16> %res
  26 }
  27
  28 define <vscale x 4 x i32> @mul_i32_imm(<vscale x 4 x i32> %a) {
  29 ; CHECK-LABEL: mul_i32_imm
  30 ; CHECK: mov w[[W:[0-9]+]], #255
  31 ; CHECK-NEXT: mov z1.s, w[[W]]
  32 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
  33   %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
  34   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
  35   %res = mul <vscale x 4 x i32> %a, %splat
  36   ret <vscale x 4 x i32> %res
  37 }
  38
  39 define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
  40 ; CHECK-LABEL: mul_i32_imm_neg
  41 ; CHECK: mov w[[W:[0-9]+]], #-200
  42 ; CHECK-NEXT: mov z1.s, w[[W]]
  43 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
  44   %elt = insertelement <vscale x 4 x i32> undef, i32 -200, i32 0
  45   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
  46   %res = mul <vscale x 4 x i32> %a, %splat
  47   ret <vscale x 4 x i32> %res
  48 }
  49
  50 define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
  51 ; CHECK-LABEL: mul_i64_imm
  52 ; CHECK: mov w[[X:[0-9]+]], #255
  53 ; CHECK-NEXT: z1.d, x[[X]]
  54 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
  55   %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
  56   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
  57   %res = mul <vscale x 2 x i64> %a, %splat
  58   ret <vscale x 2 x i64> %res
  59 }
  60
  61 define <vscale x 2 x i64> @mul_i64_imm_neg(<vscale x 2 x i64> %a) {
  62 ; CHECK-LABEL: mul_i64_imm_neg
  63 ; CHECK: mov x[[X:[0-9]+]], #-200
  64 ; CHECK-NEXT: z1.d, x[[X]]
  65 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
  66   %elt = insertelement <vscale x 2 x i64> undef, i64 -200, i32 0
  67   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
  68   %res = mul <vscale x 2 x i64> %a, %splat
  69   ret <vscale x 2 x i64> %res
  70 }
  71
  72 ;
  73 ; MUL (vector, unpredicated)
  74 ;
  75 define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a,
  76                                   <vscale x 16 x i8> %b) {
  77 ; CHECK-LABEL: mul_i8
  78 ; CHECK: mul z0.b, z0.b, z1.b
  79 ; CHECK-NEXT: ret
  80   %res = mul <vscale x 16 x i8> %a, %b
  81   ret <vscale x 16 x i8> %res
  82 }
  83
  84 define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a,
  85                                   <vscale x 8 x i16> %b) {
  86 ; CHECK-LABEL: mul_i16
  87 ; CHECK: mul z0.h, z0.h, z1.h
  88 ; CHECK-NEXT: ret
  89   %res = mul <vscale x 8 x i16> %a, %b
  90   ret <vscale x 8 x i16> %res
  91 }
  92
  93 define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a,
  94                                   <vscale x 4 x i32> %b) {
  95 ; CHECK-LABEL: mul_i32
  96 ; CHECK: mul z0.s, z0.s, z1.s
  97 ; CHECK-NEXT: ret
  98   %res = mul <vscale x 4 x i32> %a, %b
  99   ret <vscale x 4 x i32> %res
 100 }
 101
 102 define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a,
 103                                   <vscale x 2 x i64> %b) {
 104 ; CHECK-LABEL: mul_i64
 105 ; CHECK: mul z0.d, z0.d, z1.d
 106 ; CHECK-NEXT: ret
 107   %res = mul <vscale x 2 x i64> %a, %b
 108   ret <vscale x 2 x i64> %res
 109 }
 110
 111 ;
 112 ; SMULH (vector, unpredicated)
 113 ;
 114 define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a,
 115                                     <vscale x 16 x i8> %b) {
 116 ; CHECK-LABEL: smulh_i8
 117 ; CHECK: smulh z0.b, z0.b, z1.b
 118 ; CHECK-NEXT: ret
 119   %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 120   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
 121                                                                  <vscale x 16 x i8> %b)
 122   ret <vscale x 16 x i8> %res
 123 }
 124
 125 define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a,
 126                                      <vscale x 8 x i16> %b) {
 127 ; CHECK-LABEL: smulh_i16
 128 ; CHECK: smulh z0.h, z0.h, z1.h
 129 ; CHECK-NEXT: ret
 130   %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
 131   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
 132                                                                  <vscale x 8 x i16> %b)
 133   ret <vscale x 8 x i16> %res
 134 }
 135
 136 define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a,
 137                                      <vscale x 4 x i32> %b) {
 138 ; CHECK-LABEL: smulh_i32
 139 ; CHECK: smulh z0.s, z0.s, z1.s
 140 ; CHECK-NEXT: ret
 141   %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
 142   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
 143                                                                  <vscale x 4 x i32> %b)
 144   ret <vscale x 4 x i32> %res
 145 }
 146
 147 define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a,
 148                                      <vscale x 2 x i64> %b) {
 149 ; CHECK-LABEL: smulh_i64
 150 ; CHECK: smulh z0.d, z0.d, z1.d
 151 ; CHECK-NEXT: ret
 152   %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
 153   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
 154                                                                  <vscale x 2 x i64> %b)
 155   ret <vscale x 2 x i64> %res
 156 }
 157
 158 ;
 159 ; UMULH (vector, unpredicated)
 160 ;
 161 define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a,
 162                                     <vscale x 16 x i8> %b) {
 163 ; CHECK-LABEL: umulh_i8
 164 ; CHECK: umulh z0.b, z0.b, z1.b
 165 ; CHECK-NEXT: ret
 166   %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 167   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
 168                                                                  <vscale x 16 x i8> %b)
 169   ret <vscale x 16 x i8> %res
 170 }
 171
 172 define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a,
 173                                      <vscale x 8 x i16> %b) {
 174 ; CHECK-LABEL: umulh_i16
 175 ; CHECK: umulh z0.h, z0.h, z1.h
 176 ; CHECK-NEXT: ret
 177   %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
 178   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
 179                                                                  <vscale x 8 x i16> %b)
 180   ret <vscale x 8 x i16> %res
 181 }
 182
 183 define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a,
 184                                      <vscale x 4 x i32> %b) {
 185 ; CHECK-LABEL: umulh_i32
 186 ; CHECK: umulh z0.s, z0.s, z1.s
 187 ; CHECK-NEXT: ret
 188   %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
 189   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
 190                                                                  <vscale x 4 x i32> %b)
 191   ret <vscale x 4 x i32> %res
 192 }
 193
 194 define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a,
 195                                      <vscale x 2 x i64> %b) {
 196 ; CHECK-LABEL: umulh_i64
 197 ; CHECK: umulh z0.d, z0.d, z1.d
 198 ; CHECK-NEXT: ret
 199   %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
 200   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
 201                                                                  <vscale x 2 x i64> %b)
 202   ret <vscale x 2 x i64> %res
 203 }
 204
 205 ;
 206 ; PMUL (vector, unpredicated)
 207 ;
 208 define <vscale x 16 x i8> @pmul_i8(<vscale x 16 x i8> %a,
 209                                    <vscale x 16 x i8> %b) {
 210 ; CHECK-LABEL: pmul_i8
 211 ; CHECK: pmul z0.b, z0.b, z1.b
 212 ; CHECK-NEXT: ret
 213   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8> %a,
 214                                                                 <vscale x 16 x i8> %b)
 215   ret <vscale x 16 x i8> %res
 216 }
 217
 218 ;
 219 ; SQDMULH (vector, unpredicated)
 220 ;
 221 define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a,
 222                                       <vscale x 16 x i8> %b) {
 223 ; CHECK-LABEL: sqdmulh_i8
 224 ; CHECK: sqdmulh z0.b, z0.b, z1.b
 225 ; CHECK-NEXT: ret
 226   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a,
 227                                                                    <vscale x 16 x i8> %b)
 228   ret <vscale x 16 x i8> %res
 229 }
 230
 231 define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a,
 232                                        <vscale x 8 x i16> %b) {
 233 ; CHECK-LABEL: sqdmulh_i16
 234 ; CHECK: sqdmulh z0.h, z0.h, z1.h
 235 ; CHECK-NEXT: ret
 236   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a,
 237                                                                    <vscale x 8 x i16> %b)
 238   ret <vscale x 8 x i16> %res
 239 }
 240
 241 define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a,
 242                                        <vscale x 4 x i32> %b) {
 243 ; CHECK-LABEL: sqdmulh_i32
 244 ; CHECK: sqdmulh z0.s, z0.s, z1.s
 245 ; CHECK-NEXT: ret
 246   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a,
 247                                                                    <vscale x 4 x i32> %b)
 248   ret <vscale x 4 x i32> %res
 249 }
 250
 251 define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a,
 252                                        <vscale x 2 x i64> %b) {
 253 ; CHECK-LABEL: sqdmulh_i64
 254 ; CHECK: sqdmulh z0.d, z0.d, z1.d
 255 ; CHECK-NEXT: ret
 256   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a,
 257                                                                    <vscale x 2 x i64> %b)
 258   ret <vscale x 2 x i64> %res
 259 }
 260
 261 ;
 262 ; SQRDMULH (vector, unpredicated)
 263 ;
 264 define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a,
 265                                        <vscale x 16 x i8> %b) {
 266 ; CHECK-LABEL: sqrdmulh_i8
 267 ; CHECK: sqrdmulh z0.b, z0.b, z1.b
 268 ; CHECK-NEXT: ret
 269   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a,
 270                                                                     <vscale x 16 x i8> %b)
 271   ret <vscale x 16 x i8> %res
 272 }
 273
 274 define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a,
 275                                         <vscale x 8 x i16> %b) {
 276 ; CHECK-LABEL: sqrdmulh_i16
 277 ; CHECK: sqrdmulh z0.h, z0.h, z1.h
 278 ; CHECK-NEXT: ret
 279   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a,
 280                                                                     <vscale x 8 x i16> %b)
 281   ret <vscale x 8 x i16> %res
 282 }
 283
 284 define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a,
 285                                         <vscale x 4 x i32> %b) {
 286 ; CHECK-LABEL: sqrdmulh_i32
 287 ; CHECK: sqrdmulh z0.s, z0.s, z1.s
 288 ; CHECK-NEXT: ret
 289   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a,
 290                                                                     <vscale x 4 x i32> %b)
 291   ret <vscale x 4 x i32> %res
 292 }
 293
 294 define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a,
 295                                         <vscale x 2 x i64> %b) {
 296 ; CHECK-LABEL: sqrdmulh_i64
 297 ; CHECK: sqrdmulh z0.d, z0.d, z1.d
 298 ; CHECK-NEXT: ret
 299   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a,
 300                                                                     <vscale x 2 x i64> %b)
 301   ret <vscale x 2 x i64> %res
 302 }
 303
 304 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 305 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
 306 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 307 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
 308 declare <vscale x 16 x  i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
 309 declare <vscale x  8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 310 declare <vscale x  4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
 311 declare <vscale x  2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 312 declare <vscale x 16 x  i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
 313 declare <vscale x  8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 314 declare <vscale x  4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
 315 declare <vscale x  2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 316 declare <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 317 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 318 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
 319 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 320 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
 321 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 322 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
 323 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 324 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)