clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem-constrained.c

   1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
   2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
   3 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
   4 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
   5 // RUN: -ffp-exception-behavior=strict \
   6 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
   7 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
   8 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
   9 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \
  10 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
  11 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
  12 // RUN: -ffp-exception-behavior=strict \
  13 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \
  14 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
  15
  16 // REQUIRES: aarch64-registered-target
  17
  18 // Test new aarch64 intrinsics and types but constrained
  19
  20 #include <arm_neon.h>
  21
  22 // COMMON-LABEL: test_vfmas_lane_f32
  23 // COMMONIR:        [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
  24 // UNCONSTRAINED:   [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
  25 // CONSTRAINED:     [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float %b, float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
  26 // CHECK-ASM:       fmla s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}]
  27 // COMMONIR:        ret float [[TMP2]]
  28 float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
  29   return vfmas_lane_f32(a, b, c, 1);
  30 }
  31
  32 // COMMON-LABEL: test_vfmad_lane_f64
  33 // COMMONIR:        [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
  34 // UNCONSTRAINED:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
  35 // CONSTRAINED:     [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
  36 // CHECK-ASM:       fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
  37 // COMMONIR:        ret double [[TMP2]]
  38 float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
  39   return vfmad_lane_f64(a, b, c, 0);
  40 }
  41
  42 // COMMON-LABEL: test_vfmad_laneq_f64
  43 // COMMONIR:        [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
  44 // UNCONSTRAINED:   [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
  45 // CONSTRAINED:     [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
  46 // CHECK-ASM:       fmla d{{[0-9]+}}, d{{[0-9]+}}, v{{[0-9]+}}.d[{{[0-9]+}}]
  47 // COMMONIR:        ret double [[TMP2]]
  48 float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
  49   return vfmad_laneq_f64(a, b, c, 1);
  50 }
  51
  52 // COMMON-LABEL: test_vfmss_lane_f32
  53 // COMMONIR:        [[SUB:%.*]] = fneg float %b
  54 // COMMONIR:        [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
  55 // UNCONSTRAINED:   [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
  56 // CONSTRAINED:     [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float [[SUB]], float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
  57 // CHECK-ASM:       fmls s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}]
  58 // COMMONIR:        ret float [[TMP2]]
  59 float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
  60   return vfmss_lane_f32(a, b, c, 1);
  61 }
  62
  63 // COMMON-LABEL: test_vfma_lane_f64
  64 // COMMONIR:        [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
  65 // COMMONIR:        [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
  66 // COMMONIR:        [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
  67 // COMMONIR:        [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
  68 // COMMONIR:        [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
  69 // COMMONIR:        [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
  70 // COMMONIR:        [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
  71 // UNCONSTRAINED:   [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
  72 // CONSTRAINED:     [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
  73 // CHECK-ASM:       fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
  74 // COMMONIR:        ret <1 x double> [[FMLA2]]
  75 float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
  76   return vfma_lane_f64(a, b, v, 0);
  77 }
  78
  79 // COMMON-LABEL: test_vfms_lane_f64
  80 // COMMONIR:        [[SUB:%.*]] = fneg <1 x double> %b
  81 // COMMONIR:        [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
  82 // COMMONIR:        [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
  83 // COMMONIR:        [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
  84 // COMMONIR:        [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
  85 // COMMONIR:        [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
  86 // COMMONIR:        [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
  87 // COMMONIR:        [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
  88 // UNCONSTRAINED:   [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
  89 // CONSTRAINED:     [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
  90 // CHECK-ASM:       fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
  91 // COMMONIR:        ret <1 x double> [[FMLA2]]
  92 float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
  93   return vfms_lane_f64(a, b, v, 0);
  94 }
  95
  96 // COMMON-LABEL: test_vfma_laneq_f64
  97 // COMMONIR:        [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
  98 // COMMONIR:        [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
  99 // COMMONIR:        [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
 100 // COMMONIR:        [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
 101 // COMMONIR:        [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
 102 // COMMONIR:        [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
 103 // COMMONIR:        [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 104 // UNCONSTRAINED:   [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
 105 // CONSTRAINED:     [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
 106 // CHECK-ASM:       fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
 107 // COMMONIR:        [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
 108 // COMMONIR:        ret <1 x double> [[TMP7]]
 109 float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
 110   return vfma_laneq_f64(a, b, v, 0);
 111 }
 112
 113 // COMMON-LABEL: test_vfms_laneq_f64
 114 // COMMONIR:        [[SUB:%.*]] = fneg <1 x double> %b
 115 // CHECK-ASM:       fneg d{{[0-9]+}}, d{{[0-9]+}}
 116 // COMMONIR:        [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
 117 // COMMONIR:        [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
 118 // COMMONIR:        [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
 119 // COMMONIR:        [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
 120 // COMMONIR:        [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
 121 // COMMONIR:        [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
 122 // COMMONIR:        [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
 123 // UNCONSTRAINED:   [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
 124 // CONSTRAINED:     [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
 125 // CHECK-ASM:       fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
 126 // COMMONIR:        [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
 127 // COMMONIR:        ret <1 x double> [[TMP7]]
 128 float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
 129   return vfms_laneq_f64(a, b, v, 0);
 130 }
 131