clang/test/CodeGen/arm-bf16-getset-intrinsics.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
   2 // RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +bf16 -mfloat-abi hard \
   3 // RUN:  -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
   4 // RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +bf16 -mfloat-abi soft \
   5 // RUN:  -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
   6
   7 // REQUIRES: aarch64-registered-target || arm-registered-target
   8
   9 #include <arm_neon.h>
  10
  11 // CHECK-LABEL: @test_vcreate_bf16(
  12 // CHECK-NEXT:  entry:
  13 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 [[A:%.*]] to <4 x bfloat>
  14 // CHECK-NEXT:    ret <4 x bfloat> [[TMP0]]
  15 //
  16 bfloat16x4_t test_vcreate_bf16(uint64_t a) {
  17   return vcreate_bf16(a);
  18 }
  19
  20 // CHECK-LABEL: @test_vdup_n_bf16(
  21 // CHECK-NEXT:  entry:
  22 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i32 0
  23 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
  24 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
  25 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
  26 // CHECK-NEXT:    ret <4 x bfloat> [[VECINIT3_I]]
  27 //
  28 bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) {
  29   return vdup_n_bf16(v);
  30 }
  31
  32 // CHECK-LABEL: @test_vdupq_n_bf16(
  33 // CHECK-NEXT:  entry:
  34 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i32 0
  35 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
  36 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
  37 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
  38 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x bfloat> [[VECINIT3_I]], bfloat [[V]], i32 4
  39 // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x bfloat> [[VECINIT4_I]], bfloat [[V]], i32 5
  40 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x bfloat> [[VECINIT5_I]], bfloat [[V]], i32 6
  41 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x bfloat> [[VECINIT6_I]], bfloat [[V]], i32 7
  42 // CHECK-NEXT:    ret <8 x bfloat> [[VECINIT7_I]]
  43 //
  44 bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
  45   return vdupq_n_bf16(v);
  46 }
  47
  48 // CHECK-LABEL: @test_vdup_lane_bf16(
  49 // CHECK-NEXT:  entry:
  50 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
  51 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
  52 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  53 // CHECK-NEXT:    ret <4 x bfloat> [[LANE]]
  54 //
  55 bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
  56   return vdup_lane_bf16(v, 1);
  57 }
  58
  59 // CHECK-LABEL: @test_vdupq_lane_bf16(
  60 // CHECK-NEXT:  entry:
  61 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
  62 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
  63 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  64 // CHECK-NEXT:    ret <8 x bfloat> [[LANE]]
  65 //
  66 bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
  67   return vdupq_lane_bf16(v, 1);
  68 }
  69
  70 // CHECK-LABEL: @test_vdup_laneq_bf16(
  71 // CHECK-NEXT:  entry:
  72 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
  73 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
  74 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
  75 // CHECK-NEXT:    ret <4 x bfloat> [[LANE]]
  76 //
  77 bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
  78   return vdup_laneq_bf16(v, 7);
  79 }
  80
  81 // CHECK-LABEL: @test_vdupq_laneq_bf16(
  82 // CHECK-NEXT:  entry:
  83 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
  84 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
  85 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
  86 // CHECK-NEXT:    ret <8 x bfloat> [[LANE]]
  87 //
  88 bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) {
  89   return vdupq_laneq_bf16(v, 7);
  90 }
  91
  92 // CHECK-LABEL: @test_vcombine_bf16(
  93 // CHECK-NEXT:  entry:
  94 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[LOW:%.*]], <4 x bfloat> [[HIGH:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  95 // CHECK-NEXT:    ret <8 x bfloat> [[SHUFFLE_I]]
  96 //
  97 bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) {
  98   return vcombine_bf16(low, high);
  99 }
 100
 101 // CHECK-LABEL: @test_vget_high_bf16(
 102 // CHECK-NEXT:  entry:
 103 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 104 // CHECK-NEXT:    ret <4 x bfloat> [[SHUFFLE_I]]
 105 //
 106 bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) {
 107   return vget_high_bf16(a);
 108 }
 109
 110 // CHECK-LABEL: @test_vget_low_bf16(
 111 // CHECK-NEXT:  entry:
 112 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 113 // CHECK-NEXT:    ret <4 x bfloat> [[SHUFFLE_I]]
 114 //
 115 bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) {
 116   return vget_low_bf16(a);
 117 }
 118
 119 // CHECK-LABEL: @test_vget_lane_bf16(
 120 // CHECK-NEXT:  entry:
 121 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
 122 // CHECK-NEXT:    ret bfloat [[VGET_LANE]]
 123 //
 124 bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) {
 125   return vget_lane_bf16(v, 1);
 126 }
 127
 128 // CHECK-LABEL: @test_vgetq_lane_bf16(
 129 // CHECK-NEXT:  entry:
 130 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
 131 // CHECK-NEXT:    ret bfloat [[VGET_LANE]]
 132 //
 133 bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) {
 134   return vgetq_lane_bf16(v, 7);
 135 }
 136
 137 // CHECK-LABEL: @test_vset_lane_bf16(
 138 // CHECK-NEXT:  entry:
 139 // CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 1
 140 // CHECK-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
 141 //
 142 bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) {
 143   return vset_lane_bf16(a, v, 1);
 144 }
 145
 146 // CHECK-LABEL: @test_vsetq_lane_bf16(
 147 // CHECK-NEXT:  entry:
 148 // CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 7
 149 // CHECK-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
 150 //
 151 bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) {
 152   return vsetq_lane_bf16(a, v, 7);
 153 }
 154
 155 // CHECK-LABEL: @test_vduph_lane_bf16(
 156 // CHECK-NEXT:  entry:
 157 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1
 158 // CHECK-NEXT:    ret bfloat [[VGET_LANE]]
 159 //
 160 bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) {
 161   return vduph_lane_bf16(v, 1);
 162 }
 163
 164 // CHECK-LABEL: @test_vduph_laneq_bf16(
 165 // CHECK-NEXT:  entry:
 166 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7
 167 // CHECK-NEXT:    ret bfloat [[VGET_LANE]]
 168 //
 169 bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) {
 170   return vduph_laneq_bf16(v, 7);
 171 }