1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 %struct.a = type { i32, i32 }
7 @c = external dso_local global %struct.a, align 4
8 @glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
10 define <vscale x 4 x i32> @splat_base(ptr %base, <vscale x 4 x i64> %index, <vscale x 4 x i1> %mask) #0 {
11 ; CHECK-LABEL: @splat_base(
12 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <vscale x 4 x i64> [[INDEX:%.*]]
13 ; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
14 ; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
16 %broadcast.splatinsert = insertelement <vscale x 4 x ptr> undef, ptr %base, i32 0
17 %broadcast.splat = shufflevector <vscale x 4 x ptr> %broadcast.splatinsert, <vscale x 4 x ptr> undef, <vscale x 4 x i32> zeroinitializer
18 %gep = getelementptr i32, <vscale x 4 x ptr> %broadcast.splat, <vscale x 4 x i64> %index
19 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
20 ret <vscale x 4 x i32> %res
23 define <vscale x 4 x i32> @splat_struct(ptr %base, <vscale x 4 x i1> %mask) #0 {
24 ; CHECK-LABEL: @splat_struct(
25 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[BASE:%.*]], i64 0, i32 1
26 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <vscale x 4 x i64> zeroinitializer
27 ; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
28 ; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
30 %gep = getelementptr %struct.a, ptr %base, <vscale x 4 x i64> zeroinitializer, i32 1
31 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
32 ret <vscale x 4 x i32> %res
35 define <vscale x 4 x i32> @scalar_index(ptr %base, i64 %index, <vscale x 4 x i1> %mask) #0 {
36 ; CHECK-LABEL: @scalar_index(
37 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]]
38 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <vscale x 4 x i64> zeroinitializer
39 ; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
40 ; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
42 %broadcast.splatinsert = insertelement <vscale x 4 x ptr> undef, ptr %base, i32 0
43 %broadcast.splat = shufflevector <vscale x 4 x ptr> %broadcast.splatinsert, <vscale x 4 x ptr> undef, <vscale x 4 x i32> zeroinitializer
44 %gep = getelementptr i32, <vscale x 4 x ptr> %broadcast.splat, i64 %index
45 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
46 ret <vscale x 4 x i32> %res
49 define <vscale x 4 x i32> @splat_index(ptr %base, i64 %index, <vscale x 4 x i1> %mask) #0 {
50 ; CHECK-LABEL: @splat_index(
51 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]]
52 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <vscale x 4 x i64> zeroinitializer
53 ; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
54 ; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]]
56 %broadcast.splatinsert = insertelement <vscale x 4 x i64> undef, i64 %index, i32 0
57 %broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
58 %gep = getelementptr i32, ptr %base, <vscale x 4 x i64> %broadcast.splat
59 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
60 ret <vscale x 4 x i32> %res
63 define <vscale x 4 x i32> @test_global_array(<vscale x 4 x i64> %indxs, <vscale x 4 x i1> %mask) #0 {
64 ; CHECK-LABEL: @test_global_array(
65 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr @glob_array, <vscale x 4 x i64> [[INDXS:%.*]]
66 ; CHECK-NEXT: [[G:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
67 ; CHECK-NEXT: ret <vscale x 4 x i32> [[G]]
69 %p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <vscale x 4 x i64> %indxs
70 %g = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %p, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
71 ret <vscale x 4 x i32> %g
74 define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 {
75 ; CHECK-LABEL: @global_struct_splat(
76 ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> shufflevector (<vscale x 4 x ptr> insertelement (<vscale x 4 x ptr> poison, ptr getelementptr ([[STRUCT_A:%.*]], ptr @c, i64 0, i32 1), i64 0), <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
77 ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
79 %1 = insertelement <vscale x 4 x ptr> undef, ptr @c, i32 0
80 %2 = shufflevector <vscale x 4 x ptr> %1, <vscale x 4 x ptr> undef, <vscale x 4 x i32> zeroinitializer
81 %3 = getelementptr %struct.a, <vscale x 4 x ptr> %2, <vscale x 4 x i64> zeroinitializer, i32 1
82 %4 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %3, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
83 ret <vscale x 4 x i32> %4
86 define <vscale x 4 x i32> @splat_ptr_gather(ptr %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) #0 {
87 ; CHECK-LABEL: @splat_ptr_gather(
88 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
89 ; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> [[PASSTHRU:%.*]])
90 ; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
92 %1 = insertelement <vscale x 4 x ptr> undef, ptr %ptr, i32 0
93 %2 = shufflevector <vscale x 4 x ptr> %1, <vscale x 4 x ptr> undef, <vscale x 4 x i32> zeroinitializer
94 %3 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %2, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru)
95 ret <vscale x 4 x i32> %3
98 define void @splat_ptr_scatter(ptr %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %val) #0 {
99 ; CHECK-LABEL: @splat_ptr_scatter(
100 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
101 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[VAL:%.*]], <vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]])
102 ; CHECK-NEXT: ret void
104 %1 = insertelement <vscale x 4 x ptr> undef, ptr %ptr, i32 0
105 %2 = shufflevector <vscale x 4 x ptr> %1, <vscale x 4 x ptr> undef, <vscale x 4 x i32> zeroinitializer
106 call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %2, i32 4, <vscale x 4 x i1> %mask)
110 declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
111 declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
113 attributes #0 = { "target-features"="+sve" }