clang/test/CodeGen/arm-poly-add.c

   1 // REQUIRES: arm-registered-target
   2 // RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi \
   3 // RUN:   -target-feature +neon \
   4 // RUN:   -mfloat-abi hard \
   5 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
   6 // RUN:  | FileCheck %s
   7
   8 #include <arm_neon.h>
   9
  10 // CHECK-LABEL: @test_vadd_p8(
  11 // CHECK-NEXT:  entry:
  12 // CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]]
  13 // CHECK-NEXT:    ret <8 x i8> [[TMP0]]
  14 //
  15 poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) {
  16   return vadd_p8 (a, b);
  17 }
  18
  19 // CHECK-LABEL: @test_vadd_p16(
  20 // CHECK-NEXT:  entry:
  21 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
  22 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
  23 // CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]]
  24 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
  25 // CHECK-NEXT:    ret <4 x i16> [[TMP3]]
  26 //
  27 poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) {
  28   return vadd_p16 (a, b);
  29 }
  30
  31 // CHECK-LABEL: @test_vadd_p64(
  32 // CHECK-NEXT:  entry:
  33 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
  34 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
  35 // CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]]
  36 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
  37 // CHECK-NEXT:    ret <1 x i64> [[TMP3]]
  38 //
  39 poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) {
  40   return vadd_p64(a, b);
  41 }
  42
  43 // CHECK-LABEL: @test_vaddq_p8(
  44 // CHECK-NEXT:  entry:
  45 // CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]]
  46 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
  47 //
  48 poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){
  49   return vaddq_p8(a, b);
  50 }
  51
  52 // CHECK-LABEL: @test_vaddq_p16(
  53 // CHECK-NEXT:  entry:
  54 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
  55 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8>
  56 // CHECK-NEXT:    [[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]]
  57 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
  58 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
  59 //
  60 poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){
  61   return vaddq_p16(a, b);
  62 }
  63
  64 // CHECK-LABEL: @test_vaddq_p64(
  65 // CHECK-NEXT:  entry:
  66 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
  67 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
  68 // CHECK-NEXT:    [[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]]
  69 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
  70 // CHECK-NEXT:    ret <2 x i64> [[TMP3]]
  71 //
  72 poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){
  73   return vaddq_p64(a, b);
  74 }
  75
  76 // TODO: poly128_t not implemented on aarch32
  77 // CHCK-LABEL: @test_vaddq_p128(
  78 // CHCK-NEXT:  entry:
  79 // CHCK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8>
  80 // CHCK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8>
  81 // CHCK-NEXT:    [[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]]
  82 // CHCK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
  83 // CHCK-NEXT:    ret i128 [[TMP3]]
  84 //
  85 //poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){
  86 //  return vaddq_p128(a, b);