1 ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
4 ; Unpredicated dup instruction (which is an alias for mov):
5 ; * register + register,
6 ; * register + immediate
9 define <vscale x 16 x i8> @dup_i8(i8 %b) {
10 ; CHECK-LABEL: dup_i8:
13 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %b)
14 ret <vscale x 16 x i8> %out
17 define <vscale x 16 x i8> @dup_imm_i8() {
18 ; CHECK-LABEL: dup_imm_i8:
19 ; CHECK: mov z0.b, #16
21 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 16)
22 ret <vscale x 16 x i8> %out
25 define <vscale x 8 x i16> @dup_i16(i16 %b) {
26 ; CHECK-LABEL: dup_i16:
29 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %b)
30 ret <vscale x 8 x i16> %out
33 define <vscale x 8 x i16> @dup_imm_i16(i16 %b) {
34 ; CHECK-LABEL: dup_imm_i16:
35 ; CHECK: mov z0.h, #16
37 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
38 ret <vscale x 8 x i16> %out
41 define <vscale x 4 x i32> @dup_i32(i32 %b) {
42 ; CHECK-LABEL: dup_i32:
45 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %b)
46 ret <vscale x 4 x i32> %out
49 define <vscale x 4 x i32> @dup_imm_i32(i32 %b) {
50 ; CHECK-LABEL: dup_imm_i32:
51 ; CHECK: mov z0.s, #16
53 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 16)
54 ret <vscale x 4 x i32> %out
57 define <vscale x 2 x i64> @dup_i64(i64 %b) {
58 ; CHECK-LABEL: dup_i64:
61 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b)
62 ret <vscale x 2 x i64> %out
65 define <vscale x 2 x i64> @dup_imm_i64(i64 %b) {
66 ; CHECK-LABEL: dup_imm_i64:
67 ; CHECK: mov z0.d, #16
69 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 16)
70 ret <vscale x 2 x i64> %out
73 define <vscale x 8 x half> @dup_f16(half %b) {
74 ; CHECK-LABEL: dup_f16:
77 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b)
78 ret <vscale x 8 x half> %out
81 define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
82 ; CHECK-LABEL: dup_bf16:
85 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
86 ret <vscale x 8 x bfloat> %out
89 define <vscale x 8 x half> @dup_imm_f16(half %b) {
90 ; CHECK-LABEL: dup_imm_f16:
91 ; CHECK: mov z0.h, #16.00000000
93 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.)
94 ret <vscale x 8 x half> %out
97 define <vscale x 4 x float> @dup_f32(float %b) {
98 ; CHECK-LABEL: dup_f32:
101 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b)
102 ret <vscale x 4 x float> %out
105 define <vscale x 4 x float> @dup_imm_f32(float %b) {
106 ; CHECK-LABEL: dup_imm_f32:
107 ; CHECK: mov z0.s, #16.00000000
109 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 16.)
110 ret <vscale x 4 x float> %out
113 define <vscale x 2 x double> @dup_f64(double %b) {
114 ; CHECK-LABEL: dup_f64:
115 ; CHECK: mov z0.d, d0
117 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b)
118 ret <vscale x 2 x double> %out
121 define <vscale x 2 x double> @dup_imm_f64(double %b) {
122 ; CHECK-LABEL: dup_imm_f64:
123 ; CHECK: mov z0.d, #16.00000000
125 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 16.)
126 ret <vscale x 2 x double> %out
129 define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
130 ; CHECK-LABEL: dup_fmov_imm_f32_2:
131 ; CHECK: mov w8, #1109917696
132 ; CHECK-NEXT: mov z0.s, w8
133 %out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01)
134 ret <vscale x 2 x float> %out
137 define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
138 ; CHECK-LABEL: dup_fmov_imm_f32_4:
139 ; CHECK: mov w8, #1109917696
140 ; CHECK-NEXT: mov z0.s, w8
141 %out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01)
142 ret <vscale x 4 x float> %out
145 define <vscale x 2 x double> @dup_fmov_imm_f64_2() {
146 ; CHECK-LABEL: dup_fmov_imm_f64_2:
147 ; CHECK: mov x8, #4631107791820423168
148 ; CHECK-NEXT: mov z0.d, x8
149 %out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01)
150 ret <vscale x 2 x double> %out
153 declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8)
154 declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
155 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
156 declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
157 declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
158 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat)
159 declare <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float)
160 declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
161 declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)
163 ; +bf16 is required for the bfloat version.
164 attributes #0 = { "target-features"="+sve,+bf16" }