llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll

   1 ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
   2
   3 define <9 x double> @strided_load_3x3_volatile(ptr %in, i64 %stride) {
   4 ; CHECK-LABEL: @strided_load_3x3_volatile(
   5 ; CHECK-NEXT:  entry:
   6 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
   7 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]]
   8 ; CHECK-NEXT:    load volatile <3 x double>, ptr [[VEC_GEP]], align 8
   9 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
  10 ; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]]
  11 ; CHECK-NEXT:    load volatile <3 x double>, ptr [[VEC_GEP2]], align 8
  12 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
  13 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]]
  14 ; CHECK-NEXT:    load volatile <3 x double>, ptr [[VEC_GEP6]], align 8
  15 ; CHECK-NOT:     = load
  16 ;
  17 entry:
  18   %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr %in, i64 %stride, i1 true, i32 3, i32 3)
  19   ret <9 x double> %load
  20 }
  21
  22 declare <9 x double> @llvm.matrix.column.major.load.v9f64(ptr, i64, i1, i32, i32)
  23
  24 define <4 x double> @load_volatile_multiply(ptr %in) {
  25 ; CHECK-LABEL: @load_volatile_multiply(
  26 ; CHECK-NEXT:    load volatile <2 x double>, ptr [[IN:%.*]], align 8
  27 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
  28 ; CHECK-NEXT:    load volatile <2 x double>, ptr [[VEC_GEP]], align 8
  29 ; CHECK-NOT:     = load
  30 ;
  31   %in.m = load volatile <4 x double>, ptr %in, align 8
  32   %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2)
  33   ret <4 x double> %res
  34 }
  35
  36 declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32)
  37
  38
  39 define <9 x double> @strided_load_3x3_align32(ptr %in, i64 %stride) {
  40 ; CHECK-LABEL: @strided_load_3x3_align32(
  41 ; CHECK-NEXT:  entry:
  42 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
  43 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]]
  44 ; CHECK-NEXT:    load <3 x double>, ptr [[VEC_GEP]], align 32
  45 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
  46 ; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]]
  47 ; CHECK-NEXT:    load <3 x double>, ptr [[VEC_GEP2]], align 8
  48 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
  49 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]]
  50 ; CHECK-NEXT:    load <3 x double>, ptr [[VEC_GEP6]], align 8
  51 ; CHECK-NOT:     = load
  52 ;
  53 entry:
  54   %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr align 32 %in, i64 %stride, i1 false, i32 3, i32 3)
  55   ret <9 x double> %load
  56 }
  57
  58 define <9 x double> @strided_load_3x3_align2(ptr %in, i64 %stride) {
  59 ; CHECK-LABEL: @strided_load_3x3_align2(
  60 ; CHECK-NEXT:  entry:
  61 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
  62 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]]
  63 ; CHECK-NEXT:    load <3 x double>, ptr [[VEC_GEP]], align 2
  64 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
  65 ; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]]
  66 ; CHECK-NEXT:    load <3 x double>, ptr [[VEC_GEP2]], align 2
  67 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
  68 ; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]]
  69 ; CHECK-NEXT:    load <3 x double>, ptr [[VEC_GEP6]], align 2
  70 ; CHECK-NOT:     = load
  71 ;
  72 entry:
  73   %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr align 2 %in, i64 %stride, i1 false, i32 3, i32 3)
  74   ret <9 x double> %load
  75 }
  76
  77
  78 define <4 x double> @load_align2_multiply(ptr %in) {
  79 ; CHECK-LABEL: @load_align2_multiply(
  80 ; CHECK-NEXT:    load <2 x double>, ptr [[IN:%.*]], align 2
  81 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2
  82 ; CHECK-NEXT:    load <2 x double>, ptr [[VEC_GEP]], align 2
  83 ; CHECK-NOT:     = load
  84 ;
  85   %in.m = load <4 x double>, ptr %in, align 2
  86   %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2)
  87   ret <4 x double> %res
  88 }
  89
  90 define <6 x float> @strided_load_2x3_align16_stride2(ptr %in) {
  91 ; CHECK-LABEL: @strided_load_2x3_align16_stride2(
  92 ; CHECK-NEXT:  entry:
  93 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr %in, align 16
  94 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr %in, i64 2
  95 ; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
  96 ; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr float, ptr %in, i64 4
  97 ; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 16
  98 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  99 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 100 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
 101 ; CHECK-NEXT:    ret <6 x float> [[TMP3]]
 102 ;
 103 entry:
 104   %load = call <6 x float> @llvm.matrix.column.major.load.v6f32(ptr align 16 %in, i64 2, i1 false, i32 2, i32 3)
 105   ret <6 x float> %load
 106 }
 107
 108 declare <6 x float> @llvm.matrix.column.major.load.v6f32(ptr, i64, i1, i32, i32)