clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
   2 // RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature -fullfp16 \
   3 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
   4 // RUN: | opt -S -passes=sroa \
   5 // RUN: | FileCheck %s --check-prefixes=CHECK-NOFP16
   6 // RUN: %clang_cc1 -triple armv8a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \
   7 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
   8 // RUN: | opt -S -passes=sroa \
   9 // RUN: | FileCheck %s --check-prefixes=CHECK-FP16
  10
  11 // REQUIRES: arm-registered-target
  12
  13 #include <arm_neon.h>
  14
  15 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vbsl_f16(
  16 // CHECK-NOFP16-SAME: <4 x i16> noundef [[A:%.*]], <2 x i32> noundef [[B_COERCE:%.*]], <2 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
  17 // CHECK-NOFP16-NEXT:  entry:
  18 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
  19 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[C_COERCE]] to <4 x half>
  20 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
  21 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
  22 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
  23 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
  24 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
  25 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
  26 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
  27 // CHECK-NOFP16-NEXT:    [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> [[TMP8]])
  28 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half>
  29 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <2 x i32>
  30 // CHECK-NOFP16-NEXT:    [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <4 x half>
  31 // CHECK-NOFP16-NEXT:    [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <2 x i32>
  32 // CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP12]]
  33 //
  34 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vbsl_f16(
  35 // CHECK-FP16-SAME: <4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
  36 // CHECK-FP16-NEXT:  entry:
  37 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
  38 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
  39 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
  40 // CHECK-FP16-NEXT:    [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
  41 // CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half>
  42 // CHECK-FP16-NEXT:    ret <4 x half> [[TMP3]]
  43 //
  44 float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
  45   return vbsl_f16(a, b, c);
  46 }
  47
  48 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vbslq_f16(
  49 // CHECK-NOFP16-SAME: <8 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B_COERCE:%.*]], <4 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0]] {
  50 // CHECK-NOFP16-NEXT:  entry:
  51 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
  52 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[C_COERCE]] to <8 x half>
  53 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
  54 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
  55 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
  56 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
  57 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
  58 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
  59 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
  60 // CHECK-NOFP16-NEXT:    [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
  61 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half>
  62 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <4 x i32>
  63 // CHECK-NOFP16-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <8 x half>
  64 // CHECK-NOFP16-NEXT:    [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <4 x i32>
  65 // CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP12]]
  66 //
  67 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vbslq_f16(
  68 // CHECK-FP16-SAME: <8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
  69 // CHECK-FP16-NEXT:  entry:
  70 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
  71 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
  72 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
  73 // CHECK-FP16-NEXT:    [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
  74 // CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half>
  75 // CHECK-FP16-NEXT:    ret <8 x half> [[TMP3]]
  76 //
  77 float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
  78   return vbslq_f16(a, b, c);
  79 }
  80
  81 // CHECK-NOFP16-LABEL: define dso_local void @test_vzip_f16(
  82 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
  83 // CHECK-NOFP16-NEXT:  entry:
  84 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
  85 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
  86 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
  87 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
  88 // CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
  89 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
  90 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
  91 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
  92 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
  93 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
  94 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
  95 // CHECK-NOFP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  96 // CHECK-NOFP16-NEXT:    store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]]
  97 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
  98 // CHECK-NOFP16-NEXT:    [[VZIP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
  99 // CHECK-NOFP16-NEXT:    store <4 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META3]]
 100 // CHECK-NOFP16-NEXT:    ret void
 101 //
 102 // CHECK-FP16-LABEL: define dso_local void @test_vzip_f16(
 103 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 104 // CHECK-FP16-NEXT:  entry:
 105 // CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
 106 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 107 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
 108 // CHECK-FP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 109 // CHECK-FP16-NEXT:    store <4 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]]
 110 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
 111 // CHECK-FP16-NEXT:    [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 112 // CHECK-FP16-NEXT:    store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META3]]
 113 // CHECK-FP16-NEXT:    ret void
 114 //
 115 float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
 116   return vzip_f16(a, b);
 117 }
 118
 119 // CHECK-NOFP16-LABEL: define dso_local void @test_vzipq_f16(
 120 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
 121 // CHECK-NOFP16-NEXT:  entry:
 122 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
 123 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
 124 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
 125 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
 126 // CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
 127 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
 128 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
 129 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
 130 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
 131 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
 132 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
 133 // CHECK-NOFP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 134 // CHECK-NOFP16-NEXT:    store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]]
 135 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
 136 // CHECK-NOFP16-NEXT:    [[VZIP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 137 // CHECK-NOFP16-NEXT:    store <8 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META6]]
 138 // CHECK-NOFP16-NEXT:    ret void
 139 //
 140 // CHECK-FP16-LABEL: define dso_local void @test_vzipq_f16(
 141 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 142 // CHECK-FP16-NEXT:  entry:
 143 // CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
 144 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 145 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
 146 // CHECK-FP16-NEXT:    [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 147 // CHECK-FP16-NEXT:    store <8 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]]
 148 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
 149 // CHECK-FP16-NEXT:    [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 150 // CHECK-FP16-NEXT:    store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META6]]
 151 // CHECK-FP16-NEXT:    ret void
 152 //
 153 float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
 154   return vzipq_f16(a, b);
 155 }
 156
 157 // CHECK-NOFP16-LABEL: define dso_local void @test_vuzp_f16(
 158 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
 159 // CHECK-NOFP16-NEXT:  entry:
 160 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
 161 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
 162 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
 163 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
 164 // CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
 165 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
 166 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
 167 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
 168 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
 169 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
 170 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
 171 // CHECK-NOFP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 172 // CHECK-NOFP16-NEXT:    store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]]
 173 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
 174 // CHECK-NOFP16-NEXT:    [[VUZP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 175 // CHECK-NOFP16-NEXT:    store <4 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META9]]
 176 // CHECK-NOFP16-NEXT:    ret void
 177 //
 178 // CHECK-FP16-LABEL: define dso_local void @test_vuzp_f16(
 179 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 180 // CHECK-FP16-NEXT:  entry:
 181 // CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
 182 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 183 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
 184 // CHECK-FP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 185 // CHECK-FP16-NEXT:    store <4 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]]
 186 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
 187 // CHECK-FP16-NEXT:    [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 188 // CHECK-FP16-NEXT:    store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META9]]
 189 // CHECK-FP16-NEXT:    ret void
 190 //
 191 float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
 192   return vuzp_f16(a, b);
 193 }
 194
 195 // CHECK-NOFP16-LABEL: define dso_local void @test_vuzpq_f16(
 196 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
 197 // CHECK-NOFP16-NEXT:  entry:
 198 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
 199 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
 200 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
 201 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
 202 // CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
 203 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
 204 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
 205 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
 206 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
 207 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
 208 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
 209 // CHECK-NOFP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 210 // CHECK-NOFP16-NEXT:    store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]]
 211 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
 212 // CHECK-NOFP16-NEXT:    [[VUZP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 213 // CHECK-NOFP16-NEXT:    store <8 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META12]]
 214 // CHECK-NOFP16-NEXT:    ret void
 215 //
 216 // CHECK-FP16-LABEL: define dso_local void @test_vuzpq_f16(
 217 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 218 // CHECK-FP16-NEXT:  entry:
 219 // CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
 220 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 221 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
 222 // CHECK-FP16-NEXT:    [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 223 // CHECK-FP16-NEXT:    store <8 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]]
 224 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
 225 // CHECK-FP16-NEXT:    [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 226 // CHECK-FP16-NEXT:    store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META12]]
 227 // CHECK-FP16-NEXT:    ret void
 228 //
 229 float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
 230   return vuzpq_f16(a, b);
 231 }
 232
 233 // CHECK-NOFP16-LABEL: define dso_local void @test_vtrn_f16(
 234 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
 235 // CHECK-NOFP16-NEXT:  entry:
 236 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
 237 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
 238 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
 239 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32>
 240 // CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
 241 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half>
 242 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
 243 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
 244 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
 245 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
 246 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
 247 // CHECK-NOFP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 248 // CHECK-NOFP16-NEXT:    store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]]
 249 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
 250 // CHECK-NOFP16-NEXT:    [[VTRN3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 251 // CHECK-NOFP16-NEXT:    store <4 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META15]]
 252 // CHECK-NOFP16-NEXT:    ret void
 253 //
 254 // CHECK-FP16-LABEL: define dso_local void @test_vtrn_f16(
 255 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 256 // CHECK-FP16-NEXT:  entry:
 257 // CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
 258 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 259 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
 260 // CHECK-FP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 261 // CHECK-FP16-NEXT:    store <4 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]]
 262 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1
 263 // CHECK-FP16-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 264 // CHECK-FP16-NEXT:    store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META15]]
 265 // CHECK-FP16-NEXT:    ret void
 266 //
 267 float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
 268   return vtrn_f16(a, b);
 269 }
 270
 271 // CHECK-NOFP16-LABEL: define dso_local void @test_vtrnq_f16(
 272 // CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
 273 // CHECK-NOFP16-NEXT:  entry:
 274 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
 275 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
 276 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
 277 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
 278 // CHECK-NOFP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
 279 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half>
 280 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
 281 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
 282 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
 283 // CHECK-NOFP16-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
 284 // CHECK-NOFP16-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
 285 // CHECK-NOFP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 286 // CHECK-NOFP16-NEXT:    store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]]
 287 // CHECK-NOFP16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
 288 // CHECK-NOFP16-NEXT:    [[VTRN3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 289 // CHECK-NOFP16-NEXT:    store <8 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META18]]
 290 // CHECK-NOFP16-NEXT:    ret void
 291 //
 292 // CHECK-FP16-LABEL: define dso_local void @test_vtrnq_f16(
 293 // CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 294 // CHECK-FP16-NEXT:  entry:
 295 // CHECK-FP16-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]])
 296 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 297 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
 298 // CHECK-FP16-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 299 // CHECK-FP16-NEXT:    store <8 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]]
 300 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1
 301 // CHECK-FP16-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 302 // CHECK-FP16-NEXT:    store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META18]]
 303 // CHECK-FP16-NEXT:    ret void
 304 //
 305 float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
 306   return vtrnq_f16(a, b);
 307 }
 308
 309 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vmov_n_f16(
 310 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 311 // CHECK-NOFP16-NEXT:  entry:
 312 // CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
 313 // CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
 314 // CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
 315 // CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
 316 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32>
 317 // CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP0]]
 318 //
 319 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vmov_n_f16(
 320 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 321 // CHECK-FP16-NEXT:  entry:
 322 // CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
 323 // CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
 324 // CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
 325 // CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
 326 // CHECK-FP16-NEXT:    ret <4 x half> [[VECINIT3]]
 327 //
 328 float16x4_t test_vmov_n_f16(float16_t a) {
 329   return vmov_n_f16(a);
 330 }
 331
 332 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vmovq_n_f16(
 333 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 334 // CHECK-NOFP16-NEXT:  entry:
 335 // CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
 336 // CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
 337 // CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
 338 // CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
 339 // CHECK-NOFP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
 340 // CHECK-NOFP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
 341 // CHECK-NOFP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
 342 // CHECK-NOFP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
 343 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32>
 344 // CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP0]]
 345 //
 346 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vmovq_n_f16(
 347 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 348 // CHECK-FP16-NEXT:  entry:
 349 // CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
 350 // CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
 351 // CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
 352 // CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
 353 // CHECK-FP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
 354 // CHECK-FP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
 355 // CHECK-FP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
 356 // CHECK-FP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
 357 // CHECK-FP16-NEXT:    ret <8 x half> [[VECINIT7]]
 358 //
 359 float16x8_t test_vmovq_n_f16(float16_t a) {
 360   return vmovq_n_f16(a);
 361 }
 362
 363 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_n_f16(
 364 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 365 // CHECK-NOFP16-NEXT:  entry:
 366 // CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
 367 // CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
 368 // CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
 369 // CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
 370 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32>
 371 // CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP0]]
 372 //
 373 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_n_f16(
 374 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 375 // CHECK-FP16-NEXT:  entry:
 376 // CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
 377 // CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
 378 // CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
 379 // CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
 380 // CHECK-FP16-NEXT:    ret <4 x half> [[VECINIT3]]
 381 //
 382 float16x4_t test_vdup_n_f16(float16_t a) {
 383   return vdup_n_f16(a);
 384 }
 385
 386 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_n_f16(
 387 // CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 388 // CHECK-NOFP16-NEXT:  entry:
 389 // CHECK-NOFP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
 390 // CHECK-NOFP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
 391 // CHECK-NOFP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
 392 // CHECK-NOFP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
 393 // CHECK-NOFP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
 394 // CHECK-NOFP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
 395 // CHECK-NOFP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
 396 // CHECK-NOFP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
 397 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32>
 398 // CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP0]]
 399 //
 400 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_n_f16(
 401 // CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] {
 402 // CHECK-FP16-NEXT:  entry:
 403 // CHECK-FP16-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
 404 // CHECK-FP16-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
 405 // CHECK-FP16-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
 406 // CHECK-FP16-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
 407 // CHECK-FP16-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
 408 // CHECK-FP16-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
 409 // CHECK-FP16-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
 410 // CHECK-FP16-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
 411 // CHECK-FP16-NEXT:    ret <8 x half> [[VECINIT7]]
 412 //
 413 float16x8_t test_vdupq_n_f16(float16_t a) {
 414   return vdupq_n_f16(a);
 415 }
 416
 417 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_lane_f16(
 418 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
 419 // CHECK-NOFP16-NEXT:  entry:
 420 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
 421 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
 422 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
 423 // CHECK-NOFP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 424 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half>
 425 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <2 x i32>
 426 // CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP4]]
 427 //
 428 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_lane_f16(
 429 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 430 // CHECK-FP16-NEXT:  entry:
 431 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 432 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 433 // CHECK-FP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 434 // CHECK-FP16-NEXT:    ret <4 x half> [[LANE]]
 435 //
 436 float16x4_t test_vdup_lane_f16(float16x4_t a) {
 437   return vdup_lane_f16(a, 3);
 438 }
 439
 440 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_lane_f16(
 441 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
 442 // CHECK-NOFP16-NEXT:  entry:
 443 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
 444 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
 445 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
 446 // CHECK-NOFP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 447 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half>
 448 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <4 x i32>
 449 // CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP4]]
 450 //
 451 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_lane_f16(
 452 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 453 // CHECK-FP16-NEXT:  entry:
 454 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 455 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 456 // CHECK-FP16-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 457 // CHECK-FP16-NEXT:    ret <8 x half> [[LANE]]
 458 //
 459 float16x8_t test_vdupq_lane_f16(float16x4_t a) {
 460   return vdupq_lane_f16(a, 3);
 461 }
 462
 463 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vext_f16(
 464 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
 465 // CHECK-NOFP16-NEXT:  entry:
 466 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
 467 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half>
 468 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8>
 469 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8>
 470 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
 471 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16>
 472 // CHECK-NOFP16-NEXT:    [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 473 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[VEXT]] to <4 x half>
 474 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <2 x i32>
 475 // CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP7]]
 476 //
 477 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vext_f16(
 478 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 479 // CHECK-FP16-NEXT:  entry:
 480 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
 481 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
 482 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
 483 // CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
 484 // CHECK-FP16-NEXT:    [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 485 // CHECK-FP16-NEXT:    ret <4 x half> [[VEXT]]
 486 //
 487 float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
 488   return vext_f16(a, b, 2);
 489 }
 490
 491 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vextq_f16(
 492 // CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
 493 // CHECK-NOFP16-NEXT:  entry:
 494 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
 495 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half>
 496 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <16 x i8>
 497 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <16 x i8>
 498 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
 499 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16>
 500 // CHECK-NOFP16-NEXT:    [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
 501 // CHECK-NOFP16-NEXT:    [[TMP6:%.*]] = bitcast <8 x i16> [[VEXT]] to <8 x half>
 502 // CHECK-NOFP16-NEXT:    [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <4 x i32>
 503 // CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP7]]
 504 //
 505 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vextq_f16(
 506 // CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
 507 // CHECK-FP16-NEXT:  entry:
 508 // CHECK-FP16-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
 509 // CHECK-FP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
 510 // CHECK-FP16-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
 511 // CHECK-FP16-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
 512 // CHECK-FP16-NEXT:    [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
 513 // CHECK-FP16-NEXT:    ret <8 x half> [[VEXT]]
 514 //
 515 float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
 516   return vextq_f16(a, b, 5);
 517 }
 518
 519 // CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vrev64_f16(
 520 // CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
 521 // CHECK-NOFP16-NEXT:  entry:
 522 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half>
 523 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32>
 524 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
 525 // CHECK-NOFP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 526 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <2 x i32>
 527 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half>
 528 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <2 x i32>
 529 // CHECK-NOFP16-NEXT:    ret <2 x i32> [[TMP5]]
 530 //
 531 // CHECK-FP16-LABEL: define dso_local <4 x half> @test_vrev64_f16(
 532 // CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 533 // CHECK-FP16-NEXT:  entry:
 534 // CHECK-FP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 535 // CHECK-FP16-NEXT:    ret <4 x half> [[SHUFFLE_I]]
 536 //
 537 float16x4_t test_vrev64_f16(float16x4_t a) {
 538   return vrev64_f16(a);
 539 }
 540
 541 // CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vrev64q_f16(
 542 // CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
 543 // CHECK-NOFP16-NEXT:  entry:
 544 // CHECK-NOFP16-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half>
 545 // CHECK-NOFP16-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32>
 546 // CHECK-NOFP16-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x half>
 547 // CHECK-NOFP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 548 // CHECK-NOFP16-NEXT:    [[TMP3:%.*]] = bitcast <8 x half> [[SHUFFLE_I]] to <4 x i32>
 549 // CHECK-NOFP16-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
 550 // CHECK-NOFP16-NEXT:    [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <4 x i32>
 551 // CHECK-NOFP16-NEXT:    ret <4 x i32> [[TMP5]]
 552 //
 553 // CHECK-FP16-LABEL: define dso_local <8 x half> @test_vrev64q_f16(
 554 // CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
 555 // CHECK-FP16-NEXT:  entry:
 556 // CHECK-FP16-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 557 // CHECK-FP16-NEXT:    ret <8 x half> [[SHUFFLE_I]]
 558 //
 559 float16x8_t test_vrev64q_f16(float16x8_t a) {
 560   return vrev64q_f16(a);
 561 }
 562 //.
 563 // CHECK-NOFP16: [[META3]] = !{[[META4:![0-9]+]]}
 564 // CHECK-NOFP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"}
 565 // CHECK-NOFP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"}
 566 // CHECK-NOFP16: [[META6]] = !{[[META7:![0-9]+]]}
 567 // CHECK-NOFP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"}
 568 // CHECK-NOFP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"}
 569 // CHECK-NOFP16: [[META9]] = !{[[META10:![0-9]+]]}
 570 // CHECK-NOFP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"}
 571 // CHECK-NOFP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"}
 572 // CHECK-NOFP16: [[META12]] = !{[[META13:![0-9]+]]}
 573 // CHECK-NOFP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"}
 574 // CHECK-NOFP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"}
 575 // CHECK-NOFP16: [[META15]] = !{[[META16:![0-9]+]]}
 576 // CHECK-NOFP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"}
 577 // CHECK-NOFP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"}
 578 // CHECK-NOFP16: [[META18]] = !{[[META19:![0-9]+]]}
 579 // CHECK-NOFP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"}
 580 // CHECK-NOFP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"}
 581 //.
 582 // CHECK-FP16: [[META3]] = !{[[META4:![0-9]+]]}
 583 // CHECK-FP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"}
 584 // CHECK-FP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"}
 585 // CHECK-FP16: [[META6]] = !{[[META7:![0-9]+]]}
 586 // CHECK-FP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"}
 587 // CHECK-FP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"}
 588 // CHECK-FP16: [[META9]] = !{[[META10:![0-9]+]]}
 589 // CHECK-FP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"}
 590 // CHECK-FP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"}
 591 // CHECK-FP16: [[META12]] = !{[[META13:![0-9]+]]}
 592 // CHECK-FP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"}
 593 // CHECK-FP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"}
 594 // CHECK-FP16: [[META15]] = !{[[META16:![0-9]+]]}
 595 // CHECK-FP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"}
 596 // CHECK-FP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"}
 597 // CHECK-FP16: [[META18]] = !{[[META19:![0-9]+]]}
 598 // CHECK-FP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"}
 599 // CHECK-FP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"}
 600 //.