clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
   2 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature -fullfp16 -target-feature +v8a\
   3 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
   4 // RUN: | opt -S -passes=mem2reg \
   5 // RUN: | FileCheck %s
   6 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
   7 // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
   8 // RUN: | opt -S -passes=mem2reg \
   9 // RUN: | FileCheck %s
  10
  11 // REQUIRES: aarch64-registered-target
  12
  13 #include <arm_neon.h>
  14
  15 // CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16
  16 // CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
  17 // CHECK-NEXT:  entry:
  18 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
  19 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
  20 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
  21 // CHECK-NEXT:    [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
  22 // CHECK-NEXT:    [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
  23 // CHECK-NEXT:    [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]]
  24 // CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
  25 // CHECK-NEXT:    [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
  26 // CHECK-NEXT:    [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
  27 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half>
  28 // CHECK-NEXT:    ret <4 x half> [[TMP4]]
  29 //
  30 float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
  31   return vbsl_f16(a, b, c);
  32 }
  33
  34 // CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16
  35 // CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
  36 // CHECK-NEXT:  entry:
  37 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
  38 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
  39 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
  40 // CHECK-NEXT:    [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
  41 // CHECK-NEXT:    [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
  42 // CHECK-NEXT:    [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]]
  43 // CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
  44 // CHECK-NEXT:    [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
  45 // CHECK-NEXT:    [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
  46 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half>
  47 // CHECK-NEXT:    ret <8 x half> [[TMP4]]
  48 //
  49 float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
  50   return vbslq_f16(a, b, c);
  51 }
  52
  53 // CHECK-LABEL: define {{[^@]+}}@test_vzip_f16
  54 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
  55 // CHECK-NEXT:  entry:
  56 // CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
  57 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
  58 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
  59 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
  60 // CHECK-NEXT:    [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  61 // CHECK-NEXT:    store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8
  62 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
  63 // CHECK-NEXT:    [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
  64 // CHECK-NEXT:    store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8
  65 // CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
  66 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
  67 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
  68 // CHECK-NEXT:    store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
  69 // CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
  70 // CHECK-NEXT:    ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
  71 //
  72 float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
  73   return vzip_f16(a, b);
  74 }
  75
  76 // CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16
  77 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
  78 // CHECK-NEXT:  entry:
  79 // CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
  80 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
  81 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
  82 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
  83 // CHECK-NEXT:    [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
  84 // CHECK-NEXT:    store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16
  85 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
  86 // CHECK-NEXT:    [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
  87 // CHECK-NEXT:    store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16
  88 // CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
  89 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
  90 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
  91 // CHECK-NEXT:    store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
  92 // CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
  93 // CHECK-NEXT:    ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
  94 //
  95 float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
  96   return vzipq_f16(a, b);
  97 }
  98
  99 // CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16
 100 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 101 // CHECK-NEXT:  entry:
 102 // CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
 103 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
 104 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 105 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
 106 // CHECK-NEXT:    [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 107 // CHECK-NEXT:    store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8
 108 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
 109 // CHECK-NEXT:    [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 110 // CHECK-NEXT:    store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8
 111 // CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
 112 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
 113 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
 114 // CHECK-NEXT:    store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
 115 // CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
 116 // CHECK-NEXT:    ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
 117 //
 118 float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
 119   return vuzp_f16(a, b);
 120 }
 121
 122 // CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16
 123 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 124 // CHECK-NEXT:  entry:
 125 // CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
 126 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
 127 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 128 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
 129 // CHECK-NEXT:    [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 130 // CHECK-NEXT:    store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16
 131 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
 132 // CHECK-NEXT:    [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 133 // CHECK-NEXT:    store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16
 134 // CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
 135 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
 136 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
 137 // CHECK-NEXT:    store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
 138 // CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
 139 // CHECK-NEXT:    ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
 140 //
 141 float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
 142   return vuzpq_f16(a, b);
 143 }
 144
 145 // CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16
 146 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 147 // CHECK-NEXT:  entry:
 148 // CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
 149 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
 150 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 151 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
 152 // CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 153 // CHECK-NEXT:    store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8
 154 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
 155 // CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 156 // CHECK-NEXT:    store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8
 157 // CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
 158 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
 159 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
 160 // CHECK-NEXT:    store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
 161 // CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
 162 // CHECK-NEXT:    ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
 163 //
 164 float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
 165   return vtrn_f16(a, b);
 166 }
 167
 168 // CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16
 169 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 170 // CHECK-NEXT:  entry:
 171 // CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
 172 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
 173 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 174 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
 175 // CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 176 // CHECK-NEXT:    store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16
 177 // CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
 178 // CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 179 // CHECK-NEXT:    store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16
 180 // CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
 181 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
 182 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
 183 // CHECK-NEXT:    store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
 184 // CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
 185 // CHECK-NEXT:    ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
 186 //
 187 float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
 188   return vtrnq_f16(a, b);
 189 }
 190
 191 // CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16
 192 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
 193 // CHECK-NEXT:  entry:
 194 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
 195 // CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
 196 // CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
 197 // CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
 198 // CHECK-NEXT:    ret <4 x half> [[VECINIT3]]
 199 //
 200 float16x4_t test_vmov_n_f16(float16_t a) {
 201   return vmov_n_f16(a);
 202 }
 203
 204 // CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16
 205 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
 206 // CHECK-NEXT:  entry:
 207 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
 208 // CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
 209 // CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
 210 // CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
 211 // CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
 212 // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
 213 // CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
 214 // CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
 215 // CHECK-NEXT:    ret <8 x half> [[VECINIT7]]
 216 //
 217 float16x8_t test_vmovq_n_f16(float16_t a) {
 218   return vmovq_n_f16(a);
 219 }
 220
 221 // CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16
 222 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
 223 // CHECK-NEXT:  entry:
 224 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
 225 // CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
 226 // CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
 227 // CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
 228 // CHECK-NEXT:    ret <4 x half> [[VECINIT3]]
 229 //
 230 float16x4_t test_vdup_n_f16(float16_t a) {
 231   return vdup_n_f16(a);
 232 }
 233
 234 // CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16
 235 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
 236 // CHECK-NEXT:  entry:
 237 // CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
 238 // CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
 239 // CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
 240 // CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
 241 // CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
 242 // CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
 243 // CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
 244 // CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
 245 // CHECK-NEXT:    ret <8 x half> [[VECINIT7]]
 246 //
 247 float16x8_t test_vdupq_n_f16(float16_t a) {
 248   return vdupq_n_f16(a);
 249 }
 250
 251 // CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16
 252 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 253 // CHECK-NEXT:  entry:
 254 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 255 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 256 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 257 // CHECK-NEXT:    ret <4 x half> [[LANE]]
 258 //
 259 float16x4_t test_vdup_lane_f16(float16x4_t a) {
 260   return vdup_lane_f16(a, 3);
 261 }
 262
 263 // CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16
 264 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 265 // CHECK-NEXT:  entry:
 266 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 267 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 268 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 269 // CHECK-NEXT:    ret <8 x half> [[LANE]]
 270 //
 271 float16x8_t test_vdupq_lane_f16(float16x4_t a) {
 272   return vdupq_lane_f16(a, 3);
 273 }
 274
 275 // CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16
 276 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 277 // CHECK-NEXT:  entry:
 278 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 279 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
 280 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 281 // CHECK-NEXT:    ret <4 x half> [[LANE]]
 282 //
 283 float16x4_t test_vdup_laneq_f16(float16x8_t a) {
 284   return vdup_laneq_f16(a, 1);
 285 }
 286
 287 // CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16
 288 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 289 // CHECK-NEXT:  entry:
 290 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 291 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
 292 // CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
 293 // CHECK-NEXT:    ret <8 x half> [[LANE]]
 294 //
 295 float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
 296   return vdupq_laneq_f16(a, 7);
 297 }
 298
 299 // CHECK-LABEL: define {{[^@]+}}@test_vext_f16
 300 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 301 // CHECK-NEXT:  entry:
 302 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 303 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
 304 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 305 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
 306 // CHECK-NEXT:    [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 307 // CHECK-NEXT:    ret <4 x half> [[VEXT]]
 308 //
 309 float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
 310   return vext_f16(a, b, 2);
 311 }
 312
 313 // CHECK-LABEL: define {{[^@]+}}@test_vextq_f16
 314 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 315 // CHECK-NEXT:  entry:
 316 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 317 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
 318 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
 319 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
 320 // CHECK-NEXT:    [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
 321 // CHECK-NEXT:    ret <8 x half> [[VEXT]]
 322 //
 323 float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
 324   return vextq_f16(a, b, 5);
 325 }
 326
 327 // CHECK-LABEL: define {{[^@]+}}@test_vrev64_f16
 328 // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 329 // CHECK-NEXT:  entry:
 330 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 331 // CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 332 //
 333 float16x4_t test_vrev64_f16(float16x4_t a) {
 334   return vrev64_f16(a);
 335 }
 336
 337 // CHECK-LABEL: define {{[^@]+}}@test_vrev64q_f16
 338 // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 339 // CHECK-NEXT:  entry:
 340 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 341 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 342 //
 343 float16x8_t test_vrev64q_f16(float16x8_t a) {
 344   return vrev64q_f16(a);
 345 }
 346
 347 // CHECK-LABEL: define {{[^@]+}}@test_vzip1_f16
 348 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 349 // CHECK-NEXT:  entry:
 350 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 351 // CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 352 //
 353 float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) {
 354   return vzip1_f16(a, b);
 355 }
 356
 357 // CHECK-LABEL: define {{[^@]+}}@test_vzip1q_f16
 358 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 359 // CHECK-NEXT:  entry:
 360 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 361 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 362 //
 363 float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) {
 364   return vzip1q_f16(a, b);
 365 }
 366
 367 // CHECK-LABEL: define {{[^@]+}}@test_vzip2_f16
 368 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 369 // CHECK-NEXT:  entry:
 370 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 371 // CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 372 //
 373 float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) {
 374   return vzip2_f16(a, b);
 375 }
 376
 377 // CHECK-LABEL: define {{[^@]+}}@test_vzip2q_f16
 378 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 379 // CHECK-NEXT:  entry:
 380 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 381 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 382 //
 383 float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) {
 384   return vzip2q_f16(a, b);
 385 }
 386
 387 // CHECK-LABEL: define {{[^@]+}}@test_vuzp1_f16
 388 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 389 // CHECK-NEXT:  entry:
 390 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 391 // CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 392 //
 393 float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) {
 394   return vuzp1_f16(a, b);
 395 }
 396
 397 // CHECK-LABEL: define {{[^@]+}}@test_vuzp1q_f16
 398 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 399 // CHECK-NEXT:  entry:
 400 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 401 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 402 //
 403 float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) {
 404   return vuzp1q_f16(a, b);
 405 }
 406
 407 // CHECK-LABEL: define {{[^@]+}}@test_vuzp2_f16
 408 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 409 // CHECK-NEXT:  entry:
 410 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 411 // CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 412 //
 413 float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) {
 414   return vuzp2_f16(a, b);
 415 }
 416
 417 // CHECK-LABEL: define {{[^@]+}}@test_vuzp2q_f16
 418 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 419 // CHECK-NEXT:  entry:
 420 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 421 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 422 //
 423 float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) {
 424   return vuzp2q_f16(a, b);
 425 }
 426
 427 // CHECK-LABEL: define {{[^@]+}}@test_vtrn1_f16
 428 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 429 // CHECK-NEXT:  entry:
 430 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 431 // CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 432 //
 433 float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) {
 434   return vtrn1_f16(a, b);
 435 }
 436
 437 // CHECK-LABEL: define {{[^@]+}}@test_vtrn1q_f16
 438 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 439 // CHECK-NEXT:  entry:
 440 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 441 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 442 //
 443 float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) {
 444   return vtrn1q_f16(a, b);
 445 }
 446
 447 // CHECK-LABEL: define {{[^@]+}}@test_vtrn2_f16
 448 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 449 // CHECK-NEXT:  entry:
 450 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 451 // CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 452 //
 453 float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) {
 454   return vtrn2_f16(a, b);
 455 }
 456
 457 // CHECK-LABEL: define {{[^@]+}}@test_vtrn2q_f16
 458 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 459 // CHECK-NEXT:  entry:
 460 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 461 // CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 462 //
 463 float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
 464   return vtrn2q_f16(a, b);
 465 }
 466
 467 // CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16
 468 // CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] {
 469 // CHECK-NEXT:  entry:
 470 // CHECK-NEXT:    [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7
 471 // CHECK-NEXT:    ret half [[VGETQ_LANE]]
 472 //
 473 float16_t test_vduph_laneq_f16(float16x8_t vec) {
 474   return vduph_laneq_f16(vec, 7);
 475 }
 476
 477 // CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16
 478 // CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] {
 479 // CHECK-NEXT:  entry:
 480 // CHECK-NEXT:    [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3
 481 // CHECK-NEXT:    ret half [[VGET_LANE]]
 482 //
 483 float16_t test_vduph_lane_f16(float16x4_t vec) {
 484   return vduph_lane_f16(vec, 3);
 485 }