llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll

   1 ; REQUIRES: asserts
   2 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s
   3 ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
   4 ; RUN: opt -mtriple=aarch64-none-linux-gnu -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s
   5 ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-NO-SVE-REMARKS
   6
   7 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   8
   9 ; These tests validate the behaviour of scalable vectorization factor hints,
  10 ; where the following applies:
  11 ;
  12 ; * If the backend does not support scalable vectors, ignore the hint and let
  13 ;   the vectorizer pick a VF.
  14 ; * If there are no dependencies and assuming the VF is a power of 2 the VF
  15 ;   should be accepted. This applies to both fixed and scalable VFs.
  16 ; * If the dependency is too small to use scalable vectors, change the VF to
  17 ;   fixed, where existing behavior applies (clamping).
  18 ; * If scalable vectorization is feasible given the dependency and the VF is
  19 ;   valid, accept it. Otherwise, clamp to the max scalable VF.
  20
  21 ; test1
  22 ;
  23 ; Scalable vectorization unfeasible, clamp VF from (4, scalable) -> (4, fixed).
  24 ;
  25 ; The pragma applied to this loop implies a scalable vector <vscale x 4 x i32>
  26 ; be used for vectorization. For fixed vectors the MaxVF=8, otherwise there
  27 ; would be a dependence between vector lanes for vectors greater than 256 bits.
  28 ;
  29 ; void test1(int *a, int *b, int N) {
  30 ;   #pragma clang loop vectorize(enable) vectorize_width(4, scalable)
  31 ;   for (int i=0; i<N; ++i) {
  32 ;     a[i + 8] = a[i] + b[i];
  33 ;   }
  34 ; }
  35 ;
  36 ; For scalable vectorization 'vscale' has to be considered, for this example
  37 ; unless max(vscale)=2 it's unsafe to vectorize. For SVE max(vscale)=16, check
  38 ; fixed-width vectorization is used instead.
  39
  40 ; CHECK-DBG: LV: Checking a loop in 'test1'
  41 ; CHECK-DBG: LV: Scalable vectorization is available
  42 ; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.
  43 ; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible.
  44 ; CHECK-DBG: LV: The max safe fixed VF is: 8.
  45 ; CHECK-DBG: LV: Selecting VF: 4.
  46 ; CHECK-LABEL: @test1
  47 ; CHECK: <4 x i32>
  48 define void @test1(ptr %a, ptr %b) #0 {
  49 entry:
  50   br label %loop
  51
  52 loop:
  53   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  54   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
  55   %0 = load i32, ptr %arrayidx, align 4
  56   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
  57   %1 = load i32, ptr %arrayidx2, align 4
  58   %add = add nsw i32 %1, %0
  59   %2 = add nuw nsw i64 %iv, 8
  60   %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
  61   store i32 %add, ptr %arrayidx5, align 4
  62   %iv.next = add nuw nsw i64 %iv, 1
  63   %exitcond.not = icmp eq i64 %iv.next, 1024
  64   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
  65
  66 exit:
  67   ret void
  68 }
  69
  70 !0 = !{!0, !1, !2}
  71 !1 = !{!"llvm.loop.vectorize.width", i32 4}
  72 !2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
  73
  74 ; test2
  75 ;
  76 ; Scalable vectorization unfeasible, clamp VF from (8, scalable) -> (4, fixed).
  77 ;
  78 ; void test2(int *a, int *b, int N) {
  79 ;   #pragma clang loop vectorize(enable) vectorize_width(8, scalable)
  80 ;   for (int i=0; i<N; ++i) {
  81 ;     a[i + 4] = a[i] + b[i];
  82 ;   }
  83 ; }
  84
  85 ; CHECK-DBG: LV: Checking a loop in 'test2'
  86 ; CHECK-DBG: LV: Scalable vectorization is available
  87 ; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible.
  88 ; CHECK-DBG: LV: The max safe fixed VF is: 4.
  89 ; CHECK-DBG: LV: User VF=vscale x 8 is unsafe. Ignoring scalable UserVF.
  90 ; CHECK-DBG: LV: Selecting VF: 4.
  91 ; CHECK-LABEL: @test2
  92 ; CHECK: <4 x i32>
  93 define void @test2(ptr %a, ptr %b) #0 {
  94 entry:
  95   br label %loop
  96
  97 loop:
  98   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  99   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
 100   %0 = load i32, ptr %arrayidx, align 4
 101   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
 102   %1 = load i32, ptr %arrayidx2, align 4
 103   %add = add nsw i32 %1, %0
 104   %2 = add nuw nsw i64 %iv, 4
 105   %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
 106   store i32 %add, ptr %arrayidx5, align 4
 107   %iv.next = add nuw nsw i64 %iv, 1
 108   %exitcond.not = icmp eq i64 %iv.next, 1024
 109   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3
 110
 111 exit:
 112   ret void
 113 }
 114
 115 !3 = !{!3, !4, !5}
 116 !4 = !{!"llvm.loop.vectorize.width", i32 8}
 117 !5 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 118
 119 ; test3
 120 ;
 121 ; Scalable vectorization feasible and the VF is valid.
 122 ;
 123 ; Specifies a vector of <vscale x 2 x i32>, i.e. maximum of 32 x i32 with 2
 124 ; words per 128-bits (unpacked).
 125 ;
 126 ; void test3(int *a, int *b, int N) {
 127 ;   #pragma clang loop vectorize(enable) vectorize_width(2, scalable)
 128 ;   for (int i=0; i<N; ++i) {
 129 ;     a[i + 32] = a[i] + b[i];
 130 ;   }
 131 ; }
 132 ;
 133 ; Max fixed VF=32, Max scalable VF=2, safe to vectorize.
 134
 135 ; CHECK-DBG-LABEL: LV: Checking a loop in 'test3'
 136 ; CHECK-DBG: LV: Scalable vectorization is available
 137 ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.
 138 ; CHECK-DBG: LV: Using user VF vscale x 2.
 139 ; CHECK-LABEL: @test3
 140 ; CHECK: <vscale x 2 x i32>
 141 define void @test3(ptr %a, ptr %b) #0 {
 142 entry:
 143   br label %loop
 144
 145 loop:
 146   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
 147   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
 148   %0 = load i32, ptr %arrayidx, align 4
 149   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
 150   %1 = load i32, ptr %arrayidx2, align 4
 151   %add = add nsw i32 %1, %0
 152   %2 = add nuw nsw i64 %iv, 32
 153   %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
 154   store i32 %add, ptr %arrayidx5, align 4
 155   %iv.next = add nuw nsw i64 %iv, 1
 156   %exitcond.not = icmp eq i64 %iv.next, 1024
 157   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6
 158
 159 exit:
 160   ret void
 161 }
 162
 163 !6 = !{!6, !7, !8}
 164 !7 = !{!"llvm.loop.vectorize.width", i32 2}
 165 !8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 166
 167 ; test4
 168 ;
 169 ; Scalable vectorization feasible, but the given VF is unsafe. Should ignore
 170 ; the hint and leave it to the vectorizer to pick a more suitable VF.
 171 ;
 172 ; Specifies a vector of <vscale x 4 x i32>, i.e. maximum of 64 x i32 with 4
 173 ; words per 128-bits (packed).
 174 ;
 175 ; void test4(int *a, int *b, int N) {
 176 ;   #pragma clang loop vectorize(enable) vectorize_width(4, scalable)
 177 ;   for (int i=0; i<N; ++i) {
 178 ;     a[i + 32] = a[i] + b[i];
 179 ;   }
 180 ; }
 181 ;
 182 ; Max fixed VF=32, Max scalable VF=2, unsafe to vectorize.
 183
 184 ; CHECK-DBG-LABEL: LV: Checking a loop in 'test4'
 185 ; CHECK-DBG: LV: Scalable vectorization is available
 186 ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2.
 187 ; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
 188 ; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is unsafe. Ignoring the hint to let the compiler pick a more suitable value.
 189 ; CHECK-DBG: Found feasible scalable VF = vscale x 2
 190 ; CHECK-DBG: LV: Selecting VF: 4.
 191 ; CHECK-LABEL: @test4
 192 ; CHECK-NOT: <vscale x 4 x i32>
 193 define void @test4(ptr %a, ptr %b) #0 {
 194 entry:
 195   br label %loop
 196
 197 loop:
 198   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
 199   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
 200   %0 = load i32, ptr %arrayidx, align 4
 201   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
 202   %1 = load i32, ptr %arrayidx2, align 4
 203   %add = add nsw i32 %1, %0
 204   %2 = add nuw nsw i64 %iv, 32
 205   %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
 206   store i32 %add, ptr %arrayidx5, align 4
 207   %iv.next = add nuw nsw i64 %iv, 1
 208   %exitcond.not = icmp eq i64 %iv.next, 1024
 209   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9
 210
 211 exit:
 212   ret void
 213 }
 214
 215 !9 = !{!9, !10, !11}
 216 !10 = !{!"llvm.loop.vectorize.width", i32 4}
 217 !11 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 218
 219 ; test5
 220 ;
 221 ; Scalable vectorization feasible and the VF is valid.
 222 ;
 223 ; Specifies a vector of <vscale x 4 x i32>, i.e. maximum of 64 x i32 with 4
 224 ; words per 128-bits (packed).
 225 ;
 226 ; void test5(int *a, int *b, int N) {
 227 ;   #pragma clang loop vectorize(enable) vectorize_width(4, scalable)
 228 ;   for (int i=0; i<N; ++i) {
 229 ;     a[i + 128] = a[i] + b[i];
 230 ;   }
 231 ; }
 232 ;
 233 ; Max fixed VF=128, Max scalable VF=8, safe to vectorize.
 234
 235 ; CHECK-DBG-LABEL: LV: Checking a loop in 'test5'
 236 ; CHECK-DBG: LV: Scalable vectorization is available
 237 ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.
 238 ; CHECK-DBG: LV: Using user VF vscale x 4
 239 ; CHECK-LABEL: @test5
 240 ; CHECK: <vscale x 4 x i32>
 241 define void @test5(ptr %a, ptr %b) #0 {
 242 entry:
 243   br label %loop
 244
 245 loop:
 246   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
 247   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
 248   %0 = load i32, ptr %arrayidx, align 4
 249   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
 250   %1 = load i32, ptr %arrayidx2, align 4
 251   %add = add nsw i32 %1, %0
 252   %2 = add nuw nsw i64 %iv, 128
 253   %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
 254   store i32 %add, ptr %arrayidx5, align 4
 255   %iv.next = add nuw nsw i64 %iv, 1
 256   %exitcond.not = icmp eq i64 %iv.next, 1024
 257   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12
 258
 259 exit:
 260   ret void
 261 }
 262
 263 !12 = !{!12, !13, !14}
 264 !13 = !{!"llvm.loop.vectorize.width", i32 4}
 265 !14 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 266
 267 ; test6
 268 ;
 269 ; Scalable vectorization feasible, but the VF is unsafe. Should ignore
 270 ; the hint and leave it to the vectorizer to pick a more suitable VF.
 271 ;
 272 ; Specifies a vector of <vscale x 16 x i32>, i.e. maximum of 256 x i32.
 273 ;
 274 ; void test6(int *a, int *b, int N) {
 275 ;   #pragma clang loop vectorize(enable) vectorize_width(16, scalable)
 276 ;   for (int i=0; i<N; ++i) {
 277 ;     a[i + 128] = a[i] + b[i];
 278 ;   }
 279 ; }
 280 ;
 281 ; Max fixed VF=128, Max scalable VF=8, unsafe to vectorize.
 282
 283 ; CHECK-DBG-LABEL: LV: Checking a loop in 'test6'
 284 ; CHECK-DBG: LV: Scalable vectorization is available
 285 ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8.
 286 ; CHECK-DBG: LV: User VF=vscale x 16 is unsafe. Ignoring scalable UserVF.
 287 ; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 16 is unsafe. Ignoring the hint to let the compiler pick a more suitable value.
 288 ; CHECK-DBG: LV: Found feasible scalable VF = vscale x 4
 289 ; CHECK-DBG: Selecting VF: vscale x 4.
 290 ; CHECK-LABEL: @test6
 291 ; CHECK: <vscale x 4 x i32>
 292 define void @test6(ptr %a, ptr %b) #0 {
 293 entry:
 294   br label %loop
 295
 296 loop:
 297   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
 298   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
 299   %0 = load i32, ptr %arrayidx, align 4
 300   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
 301   %1 = load i32, ptr %arrayidx2, align 4
 302   %add = add nsw i32 %1, %0
 303   %2 = add nuw nsw i64 %iv, 128
 304   %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
 305   store i32 %add, ptr %arrayidx5, align 4
 306   %iv.next = add nuw nsw i64 %iv, 1
 307   %exitcond.not = icmp eq i64 %iv.next, 1024
 308   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15
 309
 310 exit:
 311   ret void
 312 }
 313
 314 !15 = !{!15, !16, !17}
 315 !16 = !{!"llvm.loop.vectorize.width", i32 16}
 316 !17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 317
 318 ; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in 'test_no_sve'
 319 ; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is ignored because scalable vectors are not available.
 320 ; CHECK-NO-SVE-REMARKS: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is ignored because the target does not support scalable vectors. The compiler will pick a more suitable value.
 321 ; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4.
 322 ; CHECK-NO-SVE-LABEL: @test_no_sve
 323 ; CHECK-NO-SVE: <4 x i32>
 324 ; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
 325 define void @test_no_sve(ptr %a, ptr %b) #0 {
 326 entry:
 327   br label %loop
 328
 329 loop:
 330   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
 331   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
 332   %0 = load i32, ptr %arrayidx, align 4
 333   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
 334   %1 = load i32, ptr %arrayidx2, align 4
 335   %add = add nsw i32 %1, %0
 336   store i32 %add, ptr %arrayidx, align 4
 337   %iv.next = add nuw nsw i64 %iv, 1
 338   %exitcond.not = icmp eq i64 %iv.next, 1024
 339   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18
 340
 341 exit:
 342   ret void
 343 }
 344
 345 !18 = !{!18, !19, !20}
 346 !19 = !{!"llvm.loop.vectorize.width", i32 4}
 347 !20 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 348
 349 ; Test the LV falls back to fixed-width vectorization if scalable vectors are
 350 ; supported but max vscale is undefined.
 351 ;
 352 ; CHECK-DBG-LABEL: LV: Checking a loop in 'test_no_max_vscale'
 353 ; CHECK-DBG: LV: Scalable vectorization is available
 354 ; CHECK-DBG: The max safe fixed VF is: 4.
 355 ; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF.
 356 ; CHECK-DBG: LV: Selecting VF: 4.
 357 ; CHECK-LABEL: @test_no_max_vscale
 358 ; CHECK: <4 x i32>
 359 define void @test_no_max_vscale(ptr %a, ptr %b) #0 {
 360 entry:
 361   br label %loop
 362
 363 loop:
 364   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
 365   %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
 366   %0 = load i32, ptr %arrayidx, align 4
 367   %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
 368   %1 = load i32, ptr %arrayidx2, align 4
 369   %add = add nsw i32 %1, %0
 370   %2 = add nuw nsw i64 %iv, 4
 371   %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
 372   store i32 %add, ptr %arrayidx5, align 4
 373   %iv.next = add nuw nsw i64 %iv, 1
 374   %exitcond.not = icmp eq i64 %iv.next, 1024
 375   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21
 376
 377 exit:
 378   ret void
 379 }
 380
 381 attributes #0 = { vscale_range(1, 16) }
 382 !21 = !{!21, !22, !23}
 383 !22 = !{!"llvm.loop.vectorize.width", i32 4}
 384 !23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}