1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve2,+lut,+bf16 | FileCheck %s
4 define <vscale x 16 x i8> @test_luti2_lane_i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices){
5 ; CHECK-LABEL: test_luti2_lane_i8:
7 ; CHECK-NEXT: luti2 z0.b, { z0.b }, z1[0]
9 %res= tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.lane.nxv16i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices, i32 0)
10 ret <vscale x 16 x i8> %res
13 define <vscale x 8 x i16> @test_luti2_lane_i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){
14 ; CHECK-LABEL: test_luti2_lane_i16:
16 ; CHECK-NEXT: luti2 z0.h, { z0.h }, z1[0]
18 %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.lane.nxv8i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
19 ret <vscale x 8 x i16> %res
22 define <vscale x 8 x half> @test_luti2_lane_f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){
23 ; CHECK-LABEL: test_luti2_lane_f16:
25 ; CHECK-NEXT: luti2 z0.h, { z0.h }, z1[0]
27 %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti2.lane.nxv8f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
28 ret <vscale x 8 x half> %res
31 define <vscale x 8 x bfloat> @test_luti2_lane_bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){
32 ; CHECK-LABEL: test_luti2_lane_bf16:
34 ; CHECK-NEXT: luti2 z0.h, { z0.h }, z1[0]
36 %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti2.lane.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)
37 ret <vscale x 8 x bfloat> %res
40 define <vscale x 16 x i8> @test_luti4_lane_i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices){
41 ; CHECK-LABEL: test_luti4_lane_i8:
43 ; CHECK-NEXT: luti4 z0.b, { z0.b }, z1[0]
45 %res= tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.lane.nxv16i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices, i32 0)
46 ret <vscale x 16 x i8> %res
49 define <vscale x 8 x i16> @test_luti4_lane_i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){
50 ; CHECK-LABEL: test_luti4_lane_i16:
52 ; CHECK-NEXT: luti4 z0.h, { z0.h }, z1[0]
54 %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.nxv8i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
55 ret <vscale x 8 x i16> %res
58 define <vscale x 8 x half> @test_luti4_lane_f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){
59 ; CHECK-LABEL: test_luti4_lane_f16:
61 ; CHECK-NEXT: luti4 z0.h, { z0.h }, z1[0]
63 %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.nxv8f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
64 ret <vscale x 8 x half> %res
67 define <vscale x 8 x bfloat> @test_luti4_lane_bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){
68 ; CHECK-LABEL: test_luti4_lane_bf16:
70 ; CHECK-NEXT: luti4 z0.h, { z0.h }, z1[0]
72 %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)
73 ret <vscale x 8 x bfloat> %res
76 define <vscale x 8 x i16> @test_luti4_lane_i16_x2(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){
77 ; CHECK-LABEL: test_luti4_lane_i16_x2:
79 ; CHECK-NEXT: mov z2.d, z0.d
80 ; CHECK-NEXT: mov z3.d, z2.d
81 ; CHECK-NEXT: luti4 z0.h, { z2.h, z3.h }, z1[0]
83 %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> %table, <vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
84 ret <vscale x 8 x i16> %res
87 define <vscale x 8 x half> @test_luti4_lane_f16_x2(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){
88 ; CHECK-LABEL: test_luti4_lane_f16_x2:
90 ; CHECK-NEXT: mov z2.d, z0.d
91 ; CHECK-NEXT: mov z3.d, z2.d
92 ; CHECK-NEXT: luti4 z0.h, { z2.h, z3.h }, z1[0]
94 %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> %table, <vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
95 ret <vscale x 8 x half> %res
98 define <vscale x 8 x bfloat> @test_luti4_lane_bf16_x2(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){
99 ; CHECK-LABEL: test_luti4_lane_bf16_x2:
101 ; CHECK-NEXT: mov z2.d, z0.d
102 ; CHECK-NEXT: mov z3.d, z2.d
103 ; CHECK-NEXT: luti4 z0.h, { z2.h, z3.h }, z1[0]
105 %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)
106 ret <vscale x 8 x bfloat> %res