1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
4 ; Checks that memsets don't block PromoteAlloca.
6 define amdgpu_kernel void @memset_all_zero(i64 %val) {
7 ; CHECK-LABEL: @memset_all_zero(
9 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <6 x i64> zeroinitializer, i64 [[VAL:%.*]], i32 0
10 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i32 1
11 ; CHECK-NEXT: ret void
14 %stack = alloca [6 x i64], align 4, addrspace(5)
15 call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 48, i1 false)
16 store i64 %val, ptr addrspace(5) %stack
17 %reload = load i64, ptr addrspace(5) %stack
18 %stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
19 store i64 %val, ptr addrspace(5) %stack.1
23 define amdgpu_kernel void @memset_all_5(i64 %val) {
24 ; CHECK-LABEL: @memset_all_5(
26 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> <i64 361700864190383365, i64 361700864190383365, i64 361700864190383365, i64 361700864190383365>, i64 [[VAL:%.*]], i32 0
27 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1
28 ; CHECK-NEXT: ret void
31 %stack = alloca [4 x i64], align 4, addrspace(5)
32 call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 5, i64 32, i1 false)
33 store i64 %val, ptr addrspace(5) %stack
34 %reload = load i64, ptr addrspace(5) %stack
35 %stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
36 store i64 %val, ptr addrspace(5) %stack.1
40 define amdgpu_kernel void @memset_volatile_nopromote(i64 %val) {
41 ; CHECK-LABEL: @memset_volatile_nopromote(
43 ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
44 ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 32, i1 true)
45 ; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
46 ; CHECK-NEXT: ret void
49 %stack = alloca [4 x i64], align 4, addrspace(5)
50 call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 32, i1 true)
51 store i64 %val, ptr addrspace(5) %stack
55 define amdgpu_kernel void @memset_badsize_nopromote(i64 %val) {
56 ; CHECK-LABEL: @memset_badsize_nopromote(
58 ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
59 ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 31, i1 true)
60 ; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
61 ; CHECK-NEXT: ret void
64 %stack = alloca [4 x i64], align 4, addrspace(5)
65 call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 31, i1 true)
66 store i64 %val, ptr addrspace(5) %stack
70 define amdgpu_kernel void @memset_offset_ptr_nopromote(i64 %val) {
71 ; CHECK-LABEL: @memset_offset_ptr_nopromote(
73 ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
74 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i64], ptr addrspace(5) [[STACK]], i64 0, i64 1
75 ; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[GEP]], i8 0, i64 24, i1 true)
76 ; CHECK-NEXT: store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 8
77 ; CHECK-NEXT: ret void
80 %stack = alloca [4 x i64], align 4, addrspace(5)
81 %gep = getelementptr [4 x i64], ptr addrspace(5) %stack, i64 0, i64 1
82 call void @llvm.memset.p5.i64(ptr addrspace(5) %gep, i8 0, i64 24, i1 true)
83 store i64 %val, ptr addrspace(5) %stack
87 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)