1 ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
3 define <9 x double> @strided_load_3x3_volatile(ptr %in, i64 %stride) {
4 ; CHECK-LABEL: @strided_load_3x3_volatile(
6 ; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
7 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]]
8 ; CHECK-NEXT: load volatile <3 x double>, ptr [[VEC_GEP]], align 8
9 ; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
10 ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]]
11 ; CHECK-NEXT: load volatile <3 x double>, ptr [[VEC_GEP2]], align 8
12 ; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
13 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]]
14 ; CHECK-NEXT: load volatile <3 x double>, ptr [[VEC_GEP6]], align 8
18 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr %in, i64 %stride, i1 true, i32 3, i32 3)
19 ret <9 x double> %load
22 declare <9 x double> @llvm.matrix.column.major.load.v9f64(ptr, i64, i1, i32, i32)
24 define <4 x double> @load_volatile_multiply(ptr %in) {
25 ; CHECK-LABEL: @load_volatile_multiply(
26 ; CHECK-NEXT: load volatile <2 x double>, ptr [[IN:%.*]], align 8
27 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
28 ; CHECK-NEXT: load volatile <2 x double>, ptr [[VEC_GEP]], align 8
31 %in.m = load volatile <4 x double>, ptr %in, align 8
32 %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2)
36 declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32)
39 define <9 x double> @strided_load_3x3_align32(ptr %in, i64 %stride) {
40 ; CHECK-LABEL: @strided_load_3x3_align32(
42 ; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
43 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]]
44 ; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP]], align 32
45 ; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
46 ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]]
47 ; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP2]], align 8
48 ; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
49 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]]
50 ; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP6]], align 8
54 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr align 32 %in, i64 %stride, i1 false, i32 3, i32 3)
55 ret <9 x double> %load
58 define <9 x double> @strided_load_3x3_align2(ptr %in, i64 %stride) {
59 ; CHECK-LABEL: @strided_load_3x3_align2(
61 ; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
62 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]]
63 ; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP]], align 2
64 ; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
65 ; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]]
66 ; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP2]], align 2
67 ; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
68 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]]
69 ; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP6]], align 2
73 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr align 2 %in, i64 %stride, i1 false, i32 3, i32 3)
74 ret <9 x double> %load
78 define <4 x double> @load_align2_multiply(ptr %in) {
79 ; CHECK-LABEL: @load_align2_multiply(
80 ; CHECK-NEXT: load <2 x double>, ptr [[IN:%.*]], align 2
81 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
82 ; CHECK-NEXT: load <2 x double>, ptr [[VEC_GEP]], align 2
85 %in.m = load <4 x double>, ptr %in, align 2
86 %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2)
90 define <6 x float> @strided_load_2x3_align16_stride2(ptr %in) {
91 ; CHECK-LABEL: @strided_load_2x3_align16_stride2(
93 ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr %in, align 16
94 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr %in, i64 2
95 ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
96 ; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr %in, i64 4
97 ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 16
98 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
99 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
100 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
101 ; CHECK-NEXT: ret <6 x float> [[TMP3]]
104 %load = call <6 x float> @llvm.matrix.column.major.load.v6f32(ptr align 16 %in, i64 2, i1 false, i32 2, i32 3)
105 ret <6 x float> %load
108 declare <6 x float> @llvm.matrix.column.major.load.v6f32(ptr, i64, i1, i32, i32)