llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-f16f16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
   3
   4 ;
   5 ; FCVT
   6 ;
   7
   8 define <vscale x 8 x half> @multi_vector_cvt_x2_f16(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
   9 ; CHECK-LABEL: multi_vector_cvt_x2_f16:
  10 ; CHECK:       // %bb.0:
  11 ; CHECK-NEXT:    mov z3.d, z2.d
  12 ; CHECK-NEXT:    mov z2.d, z1.d
  13 ; CHECK-NEXT:    fcvt z0.h, { z2.s, z3.s }
  14 ; CHECK-NEXT:    ret
  15   %res = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
  16   ret <vscale x 8 x half> %res
  17 }
  18
  19 ;
  20 ; BFCVT
  21 ;
  22
  23 define <vscale x 8 x bfloat> @multi_vector_cvt_x2_bf16(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
  24 ; CHECK-LABEL: multi_vector_cvt_x2_bf16:
  25 ; CHECK:       // %bb.0:
  26 ; CHECK-NEXT:    mov z3.d, z2.d
  27 ; CHECK-NEXT:    mov z2.d, z1.d
  28 ; CHECK-NEXT:    bfcvt z0.h, { z2.s, z3.s }
  29 ; CHECK-NEXT:    ret
  30   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
  31   ret <vscale x 8 x bfloat> %res
  32 }
  33
  34 ;
  35 ; FCVTZS
  36 ;
  37 define {<vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x2_s32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
  38 ; CHECK-LABEL: multi_vector_cvt_x2_s32_f32:
  39 ; CHECK:       // %bb.0:
  40 ; CHECK-NEXT:    mov z3.d, z2.d
  41 ; CHECK-NEXT:    mov z2.d, z1.d
  42 ; CHECK-NEXT:    fcvtzs { z0.s, z1.s }, { z2.s, z3.s }
  43 ; CHECK-NEXT:    ret
  44   %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
  45   ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res
  46 }
  47
  48 define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x4_s32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
  49 ; CHECK-LABEL: multi_vector_cvt_x4_s32_f32:
  50 ; CHECK:       // %bb.0:
  51 ; CHECK-NEXT:    mov z7.d, z4.d
  52 ; CHECK-NEXT:    mov z6.d, z3.d
  53 ; CHECK-NEXT:    mov z5.d, z2.d
  54 ; CHECK-NEXT:    mov z4.d, z1.d
  55 ; CHECK-NEXT:    fcvtzs { z0.s - z3.s }, { z4.s - z7.s }
  56 ; CHECK-NEXT:    ret
  57   %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
  58   ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %res
  59 }
  60
  61 ;
  62 ; FCVTZU
  63 ;
  64 define {<vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x2_u32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
  65 ; CHECK-LABEL: multi_vector_cvt_x2_u32_f32:
  66 ; CHECK:       // %bb.0:
  67 ; CHECK-NEXT:    mov z3.d, z2.d
  68 ; CHECK-NEXT:    mov z2.d, z1.d
  69 ; CHECK-NEXT:    fcvtzu { z0.s, z1.s }, { z2.s, z3.s }
  70 ; CHECK-NEXT:    ret
  71   %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1)
  72   ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res
  73 }
  74
  75 define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x4_u32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
  76 ; CHECK-LABEL: multi_vector_cvt_x4_u32_f32:
  77 ; CHECK:       // %bb.0:
  78 ; CHECK-NEXT:    mov z7.d, z4.d
  79 ; CHECK-NEXT:    mov z6.d, z3.d
  80 ; CHECK-NEXT:    mov z5.d, z2.d
  81 ; CHECK-NEXT:    mov z4.d, z1.d
  82 ; CHECK-NEXT:    fcvtzu { z0.s - z3.s }, { z4.s - z7.s }
  83 ; CHECK-NEXT:    ret
  84   %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3)
  85   ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %res
  86 }
  87
  88 ;
  89 ; SCVTF
  90 ;
  91 define {<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x2_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
  92 ; CHECK-LABEL: multi_vector_cvt_x2_f32_s32:
  93 ; CHECK:       // %bb.0:
  94 ; CHECK-NEXT:    mov z3.d, z2.d
  95 ; CHECK-NEXT:    mov z2.d, z1.d
  96 ; CHECK-NEXT:    scvtf { z0.s, z1.s }, { z2.s, z3.s }
  97 ; CHECK-NEXT:    ret
  98   %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
  99   ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
 100 }
 101
 102 define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x4_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) {
 103 ; CHECK-LABEL: multi_vector_cvt_x4_f32_s32:
 104 ; CHECK:       // %bb.0:
 105 ; CHECK-NEXT:    mov z7.d, z4.d
 106 ; CHECK-NEXT:    mov z6.d, z3.d
 107 ; CHECK-NEXT:    mov z5.d, z2.d
 108 ; CHECK-NEXT:    mov z4.d, z1.d
 109 ; CHECK-NEXT:    scvtf { z0.s - z3.s }, { z4.s - z7.s }
 110 ; CHECK-NEXT:    ret
 111   %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
 112   ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res
 113 }
 114
 115 ;
 116 ; UCVTF
 117 ;
 118 define {<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x2_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
 119 ; CHECK-LABEL: multi_vector_cvt_x2_f32_u32:
 120 ; CHECK:       // %bb.0:
 121 ; CHECK-NEXT:    mov z3.d, z2.d
 122 ; CHECK-NEXT:    mov z2.d, z1.d
 123 ; CHECK-NEXT:    ucvtf { z0.s, z1.s }, { z2.s, z3.s }
 124 ; CHECK-NEXT:    ret
 125   %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
 126   ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
 127 }
 128
 129 define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x4_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,<vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) {
 130 ; CHECK-LABEL: multi_vector_cvt_x4_f32_u32:
 131 ; CHECK:       // %bb.0:
 132 ; CHECK-NEXT:    mov z7.d, z4.d
 133 ; CHECK-NEXT:    mov z6.d, z3.d
 134 ; CHECK-NEXT:    mov z5.d, z2.d
 135 ; CHECK-NEXT:    mov z4.d, z1.d
 136 ; CHECK-NEXT:    ucvtf { z0.s - z3.s }, { z4.s - z7.s }
 137 ; CHECK-NEXT:    ret
 138   %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3)
 139   ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res
 140 }
 141
 142 define {<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_widen_x2_f16(<vscale x 8 x half> %zn0) {
 143 ; CHECK-LABEL: multi_vector_cvt_widen_x2_f16:
 144 ; CHECK:       // %bb.0:
 145 ; CHECK-NEXT:    fcvt { z0.s, z1.s }, z0.h
 146 ; CHECK-NEXT:    ret
 147   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32(<vscale x 8 x half> %zn0)
 148   ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
 149 }
 150
 151 declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
 152 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float>, <vscale x 4 x float>)
 153 declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>)
 154 declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>)
 155 declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>)
 156 declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>)
 157 declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>)
 158 declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>)
 159 declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
 160 declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)