test/CodeGen/ARM/vqdmulh.ll

   1 ; RUN: llc < %s -march=arm -mattr=+neon > %t
   2 ; RUN: grep {vqdmulh\\.s16} %t | count 2
   3 ; RUN: grep {vqdmulh\\.s32} %t | count 2
   4 ; RUN: grep {vqrdmulh\\.s16} %t | count 2
   5 ; RUN: grep {vqrdmulh\\.s32} %t | count 2
   6
   7 define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
   8         %tmp1 = load <4 x i16>* %A
   9         %tmp2 = load <4 x i16>* %B
  10         %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  11         ret <4 x i16> %tmp3
  12 }
  13
  14 define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  15         %tmp1 = load <2 x i32>* %A
  16         %tmp2 = load <2 x i32>* %B
  17         %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  18         ret <2 x i32> %tmp3
  19 }
  20
  21 define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
  22         %tmp1 = load <8 x i16>* %A
  23         %tmp2 = load <8 x i16>* %B
  24         %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  25         ret <8 x i16> %tmp3
  26 }
  27
  28 define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  29         %tmp1 = load <4 x i32>* %A
  30         %tmp2 = load <4 x i32>* %B
  31         %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  32         ret <4 x i32> %tmp3
  33 }
  34
  35 declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  36 declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  37
  38 declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
  39 declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
  40
  41 define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
  42         %tmp1 = load <4 x i16>* %A
  43         %tmp2 = load <4 x i16>* %B
  44         %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
  45         ret <4 x i16> %tmp3
  46 }
  47
  48 define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  49         %tmp1 = load <2 x i32>* %A
  50         %tmp2 = load <2 x i32>* %B
  51         %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
  52         ret <2 x i32> %tmp3
  53 }
  54
  55 define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
  56         %tmp1 = load <8 x i16>* %A
  57         %tmp2 = load <8 x i16>* %B
  58         %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
  59         ret <8 x i16> %tmp3
  60 }
  61
  62 define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  63         %tmp1 = load <4 x i32>* %A
  64         %tmp2 = load <4 x i32>* %B
  65         %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
  66         ret <4 x i32> %tmp3
  67 }
  68
  69 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  70 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  71
  72 declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
  73 declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone