mlir/test/Target/LLVMIR/openmp-reduction.mlir

   1 // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
   2
   3 // Only check the overall shape of the code and the presence of relevant
   4 // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
   5
   6 omp.declare_reduction @add_f32 : f32
   7 init {
   8 ^bb0(%arg: f32):
   9   %0 = llvm.mlir.constant(0.0 : f32) : f32
  10   omp.yield (%0 : f32)
  11 }
  12 combiner {
  13 ^bb1(%arg0: f32, %arg1: f32):
  14   %1 = llvm.fadd %arg0, %arg1 : f32
  15   omp.yield (%1 : f32)
  16 }
  17 atomic {
  18 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
  19   %2 = llvm.load %arg3 : !llvm.ptr -> f32
  20   llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
  21   omp.yield
  22 }
  23
  24 // CHECK-LABEL: @simple_reduction
  25 llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
  26   %c1 = llvm.mlir.constant(1 : i32) : i32
  27   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
  28   omp.parallel {
  29     omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
  30       omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
  31         %1 = llvm.mlir.constant(2.0 : f32) : f32
  32         %2 = llvm.load %prv : !llvm.ptr -> f32
  33         %3 = llvm.fadd %1, %2 : f32
  34         llvm.store %3, %prv : f32, !llvm.ptr
  35         omp.yield
  36       }
  37     }
  38     omp.terminator
  39   }
  40   llvm.return
  41 }
  42
  43 // Call to the outlined function.
  44 // CHECK: call void {{.*}} @__kmpc_fork_call
  45 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
  46
  47 // Outlined function.
  48 // CHECK: define internal void @[[OUTLINED]]
  49
  50 // Private reduction variable and its initialization.
  51 // CHECK: %[[PRIVATE:.+]] = alloca float
  52 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
  53
  54 // Call to the reduction function.
  55 // CHECK: call i32 @__kmpc_reduce
  56 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
  57
  58 // Atomic reduction.
  59 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
  60 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
  61
  62 // Non-atomic reduction:
  63 // CHECK: fadd float
  64 // CHECK: call void @__kmpc_end_reduce
  65 // CHECK: br label %[[FINALIZE:.+]]
  66
  67 // CHECK: [[FINALIZE]]:
  68 // CHECK: call void @__kmpc_barrier
  69
  70 // Update of the private variable using the reduction region
  71 // (the body block currently comes after all the other blocks).
  72 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
  73 // CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]]
  74 // CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
  75
  76 // Reduction function.
  77 // CHECK: define internal void @[[REDFUNC]]
  78 // CHECK: fadd float
  79
  80 // -----
  81
  82 omp.declare_reduction @add_f32 : f32
  83 init {
  84 ^bb0(%arg: f32):
  85   %0 = llvm.mlir.constant(0.0 : f32) : f32
  86   omp.yield (%0 : f32)
  87 }
  88 combiner {
  89 ^bb1(%arg0: f32, %arg1: f32):
  90   %1 = llvm.fadd %arg0, %arg1 : f32
  91   omp.yield (%1 : f32)
  92 }
  93 atomic {
  94 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
  95   %2 = llvm.load %arg3 : !llvm.ptr -> f32
  96   llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
  97   omp.yield
  98 }
  99
 100 // When the same reduction declaration is used several times, its regions
 101 // are translated several times, which shouldn't lead to value/block
 102 // remapping assertions.
 103 // CHECK-LABEL: @reuse_declaration
 104 llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
 105   %c1 = llvm.mlir.constant(1 : i32) : i32
 106   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 107   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 108   omp.parallel {
 109     omp.wsloop reduction(@add_f32 %0 -> %prv0, @add_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) {
 110       omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
 111         %1 = llvm.mlir.constant(2.0 : f32) : f32
 112         %3 = llvm.load %prv0 : !llvm.ptr -> f32
 113         %4 = llvm.fadd %3, %1 : f32
 114         llvm.store %4, %prv0 : f32, !llvm.ptr
 115         %5 = llvm.load %prv1 : !llvm.ptr -> f32
 116         %6 = llvm.fadd %5, %1 : f32
 117         llvm.store %6, %prv1 : f32, !llvm.ptr
 118         omp.yield
 119       }
 120     }
 121     omp.terminator
 122   }
 123   llvm.return
 124 }
 125
 126 // Call to the outlined function.
 127 // CHECK: call void {{.*}} @__kmpc_fork_call
 128 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
 129
 130 // Outlined function.
 131 // CHECK: define internal void @[[OUTLINED]]
 132
 133 // Private reduction variable and its initialization.
 134 // CHECK: %[[PRIVATE1:.+]] = alloca float
 135 // CHECK: %[[PRIVATE2:.+]] = alloca float
 136 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]]
 137 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE2]]
 138
 139 // Call to the reduction function.
 140 // CHECK: call i32 @__kmpc_reduce
 141 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 142
 143 // Atomic reduction.
 144 // CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
 145 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL1]]
 146 // CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
 147 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL2]]
 148
 149 // Non-atomic reduction:
 150 // CHECK: fadd float
 151 // CHECK: fadd float
 152 // CHECK: call void @__kmpc_end_reduce
 153 // CHECK: br label %[[FINALIZE:.+]]
 154
 155 // CHECK: [[FINALIZE]]:
 156 // CHECK: call void @__kmpc_barrier
 157
 158 // Update of the private variable using the reduction region
 159 // (the body block currently comes after all the other blocks).
 160 // CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
 161 // CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
 162 // CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]]
 163 // CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
 164 // CHECK: %[[UPDATED2:.+]] = fadd float %[[PARTIAL2]], 2.000000e+00
 165 // CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]]
 166
 167 // Reduction function.
 168 // CHECK: define internal void @[[REDFUNC]]
 169 // CHECK: fadd float
 170 // CHECK: fadd float
 171
 172
 173 // -----
 174
 175 omp.declare_reduction @add_f32 : f32
 176 init {
 177 ^bb0(%arg: f32):
 178   %0 = llvm.mlir.constant(0.0 : f32) : f32
 179   omp.yield (%0 : f32)
 180 }
 181 combiner {
 182 ^bb1(%arg0: f32, %arg1: f32):
 183   %1 = llvm.fadd %arg0, %arg1 : f32
 184   omp.yield (%1 : f32)
 185 }
 186 atomic {
 187 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
 188   %2 = llvm.load %arg3 : !llvm.ptr -> f32
 189   llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
 190   omp.yield
 191 }
 192
 193 // It's okay not to reference the reduction variable in the body.
 194 // CHECK-LABEL: @missing_omp_reduction
 195 llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
 196   %c1 = llvm.mlir.constant(1 : i32) : i32
 197   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 198   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 199   omp.parallel {
 200     omp.wsloop reduction(@add_f32 %0 -> %prv0, @add_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) {
 201       omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
 202         %1 = llvm.mlir.constant(2.0 : f32) : f32
 203         %3 = llvm.load %prv0 : !llvm.ptr -> f32
 204         %4 = llvm.fadd %3, %1 : f32
 205         llvm.store %4, %prv0 : f32, !llvm.ptr
 206         omp.yield
 207       }
 208     }
 209     omp.terminator
 210   }
 211   llvm.return
 212 }
 213
 214 // Call to the outlined function.
 215 // CHECK: call void {{.*}} @__kmpc_fork_call
 216 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
 217
 218 // Outlined function.
 219 // CHECK: define internal void @[[OUTLINED]]
 220
 221 // Private reduction variable and its initialization.
 222 // CHECK: %[[PRIVATE1:.+]] = alloca float
 223 // CHECK: %[[PRIVATE2:.+]] = alloca float
 224 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]]
 225 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE2]]
 226
 227 // Call to the reduction function.
 228 // CHECK: call i32 @__kmpc_reduce
 229 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 230
 231 // Atomic reduction.
 232 // CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
 233 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL1]]
 234 // CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
 235 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL2]]
 236
 237 // Non-atomic reduction:
 238 // CHECK: fadd float
 239 // CHECK: fadd float
 240 // CHECK: call void @__kmpc_end_reduce
 241 // CHECK: br label %[[FINALIZE:.+]]
 242
 243 // CHECK: [[FINALIZE]]:
 244 // CHECK: call void @__kmpc_barrier
 245
 246 // Update of the private variable using the reduction region
 247 // (the body block currently comes after all the other blocks).
 248 // CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
 249 // CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
 250 // CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]]
 251 // CHECK-NOT: %{{.*}} = load float, ptr %[[PRIVATE2]]
 252 // CHECK-NOT: %{{.*}} = fadd float %[[PARTIAL2]], 2.000000e+00
 253
 254 // Reduction function.
 255 // CHECK: define internal void @[[REDFUNC]]
 256 // CHECK: fadd float
 257 // CHECK: fadd float
 258
 259 // -----
 260
 261 omp.declare_reduction @add_f32 : f32
 262 init {
 263 ^bb0(%arg: f32):
 264   %0 = llvm.mlir.constant(0.0 : f32) : f32
 265   omp.yield (%0 : f32)
 266 }
 267 combiner {
 268 ^bb1(%arg0: f32, %arg1: f32):
 269   %1 = llvm.fadd %arg0, %arg1 : f32
 270   omp.yield (%1 : f32)
 271 }
 272 atomic {
 273 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
 274   %2 = llvm.load %arg3 : !llvm.ptr -> f32
 275   llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
 276   omp.yield
 277 }
 278
 279 // It's okay to refer to the same reduction variable more than once in the
 280 // body.
 281 // CHECK-LABEL: @double_reference
 282 llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
 283   %c1 = llvm.mlir.constant(1 : i32) : i32
 284   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 285   omp.parallel {
 286     omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
 287       omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
 288         %1 = llvm.mlir.constant(2.0 : f32) : f32
 289         %2 = llvm.load %prv : !llvm.ptr -> f32
 290         %3 = llvm.fadd %2, %1 : f32
 291         llvm.store %3, %prv : f32, !llvm.ptr
 292         %4 = llvm.load %prv : !llvm.ptr -> f32
 293         %5 = llvm.fadd %4, %1 : f32
 294         llvm.store %5, %prv : f32, !llvm.ptr
 295         omp.yield
 296       }
 297     }
 298     omp.terminator
 299   }
 300   llvm.return
 301 }
 302
 303 // Call to the outlined function.
 304 // CHECK: call void {{.*}} @__kmpc_fork_call
 305 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
 306
 307 // Outlined function.
 308 // CHECK: define internal void @[[OUTLINED]]
 309
 310 // Private reduction variable and its initialization.
 311 // CHECK: %[[PRIVATE:.+]] = alloca float
 312 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
 313
 314 // Call to the reduction function.
 315 // CHECK: call i32 @__kmpc_reduce
 316 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 317
 318 // Atomic reduction.
 319 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
 320 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
 321
 322 // Non-atomic reduction:
 323 // CHECK: fadd float
 324 // CHECK: call void @__kmpc_end_reduce
 325 // CHECK: br label %[[FINALIZE:.+]]
 326
 327 // CHECK: [[FINALIZE]]:
 328 // CHECK: call void @__kmpc_barrier
 329
 330 // Update of the private variable using the reduction region
 331 // (the body block currently comes after all the other blocks).
 332 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
 333 // CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
 334 // CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
 335 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
 336 // CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
 337 // CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
 338
 339 // Reduction function.
 340 // CHECK: define internal void @[[REDFUNC]]
 341 // CHECK: fadd float
 342
 343 // -----
 344
 345 omp.declare_reduction @add_f32 : f32
 346 init {
 347 ^bb0(%arg: f32):
 348   %0 = llvm.mlir.constant(0.0 : f32) : f32
 349   omp.yield (%0 : f32)
 350 }
 351 combiner {
 352 ^bb1(%arg0: f32, %arg1: f32):
 353   %1 = llvm.fadd %arg0, %arg1 : f32
 354   omp.yield (%1 : f32)
 355 }
 356 atomic {
 357 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
 358   %2 = llvm.load %arg3 : !llvm.ptr -> f32
 359   llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
 360   omp.yield
 361 }
 362
 363 omp.declare_reduction @mul_f32 : f32
 364 init {
 365 ^bb0(%arg: f32):
 366   %0 = llvm.mlir.constant(1.0 : f32) : f32
 367   omp.yield (%0 : f32)
 368 }
 369 combiner {
 370 ^bb1(%arg0: f32, %arg1: f32):
 371   %1 = llvm.fmul %arg0, %arg1 : f32
 372   omp.yield (%1 : f32)
 373 }
 374
 375 // CHECK-LABEL: @no_atomic
 376 llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
 377   %c1 = llvm.mlir.constant(1 : i32) : i32
 378   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 379   %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 380   omp.parallel {
 381     omp.wsloop reduction(@add_f32 %0 -> %prv0, @mul_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) {
 382       omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
 383         %1 = llvm.mlir.constant(2.0 : f32) : f32
 384         %3 = llvm.load %prv0 : !llvm.ptr -> f32
 385         %4 = llvm.fadd %3, %1 : f32
 386         llvm.store %4, %prv0 : f32, !llvm.ptr
 387         %5 = llvm.load %prv1 : !llvm.ptr -> f32
 388         %6 = llvm.fmul %5, %1 : f32
 389         llvm.store %6, %prv1 : f32, !llvm.ptr
 390         omp.yield
 391       }
 392     }
 393     omp.terminator
 394   }
 395   llvm.return
 396 }
 397
 398 // Call to the outlined function.
 399 // CHECK: call void {{.*}} @__kmpc_fork_call
 400 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
 401
 402 // Outlined function.
 403 // CHECK: define internal void @[[OUTLINED]]
 404
 405 // Private reduction variable and its initialization.
 406 // CHECK: %[[PRIVATE1:.+]] = alloca float
 407 // CHECK: %[[PRIVATE2:.+]] = alloca float
 408 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]]
 409 // CHECK: store float 1.000000e+00, ptr %[[PRIVATE2]]
 410
 411 // Call to the reduction function.
 412 // CHECK: call i32 @__kmpc_reduce
 413 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 414
 415 // Atomic reduction not provided.
 416 // CHECK: unreachable
 417
 418 // Non-atomic reduction:
 419 // CHECK: fadd float
 420 // CHECK: fmul float
 421 // CHECK: call void @__kmpc_end_reduce
 422 // CHECK: br label %[[FINALIZE:.+]]
 423
 424 // CHECK: [[FINALIZE]]:
 425 // CHECK: call void @__kmpc_barrier
 426
 427 // Update of the private variable using the reduction region
 428 // (the body block currently comes after all the other blocks).
 429 // CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
 430 // CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
 431 // CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]]
 432 // CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
 433 // CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00
 434 // CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]]
 435
 436 // Reduction function.
 437 // CHECK: define internal void @[[REDFUNC]]
 438 // CHECK: fadd float
 439 // CHECK: fmul float
 440
 441 // -----
 442
 443 omp.declare_reduction @add_f32 : f32
 444 init {
 445 ^bb0(%arg: f32):
 446   %0 = llvm.mlir.constant(0.0 : f32) : f32
 447   omp.yield (%0 : f32)
 448 }
 449 combiner {
 450 ^bb1(%arg0: f32, %arg1: f32):
 451   %1 = llvm.fadd %arg0, %arg1 : f32
 452   omp.yield (%1 : f32)
 453 }
 454 atomic {
 455 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
 456   %2 = llvm.load %arg3 : !llvm.ptr -> f32
 457   llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
 458   omp.yield
 459 }
 460
 461 // CHECK-LABEL: @simple_reduction_parallel
 462 llvm.func @simple_reduction_parallel() {
 463   %c1 = llvm.mlir.constant(1 : i32) : i32
 464   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 465   omp.parallel reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
 466     %1 = llvm.mlir.constant(2.0 : f32) : f32
 467     %2 = llvm.load %prv : !llvm.ptr -> f32
 468     %3 = llvm.fadd %2, %1 : f32
 469     llvm.store %3, %prv : f32, !llvm.ptr
 470     omp.terminator
 471   }
 472   llvm.return
 473 }
 474
 475 // Call to the outlined function.
 476 // CHECK: call void {{.*}} @__kmpc_fork_call
 477 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
 478
 479 // Outlined function.
 480 // CHECK: define internal void @[[OUTLINED]]
 481
 482 // Private reduction variable and its initialization.
 483 // CHECK: %[[PRIVATE:.+]] = alloca float
 484 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
 485
 486 // Update of the private variable
 487 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
 488 // CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
 489 // CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
 490
 491 // Call to the reduction function.
 492 // CHECK: call i32 @__kmpc_reduce
 493 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 494
 495 // Atomic reduction.
 496 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
 497 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
 498
 499 // Non-atomic reduction:
 500 // CHECK: fadd float
 501 // CHECK: call void @__kmpc_end_reduce
 502 // CHECK: br label %[[FINALIZE:.+]]
 503
 504 // CHECK: [[FINALIZE]]:
 505
 506 // Reduction function.
 507 // CHECK: define internal void @[[REDFUNC]]
 508 // CHECK: fadd float
 509
 510 // -----
 511
 512 omp.declare_reduction @add_i32 : i32
 513 init {
 514 ^bb0(%arg: i32):
 515   %0 = llvm.mlir.constant(0 : i32) : i32
 516   omp.yield (%0 : i32)
 517 }
 518 combiner {
 519 ^bb1(%arg0: i32, %arg1: i32):
 520   %1 = llvm.add %arg0, %arg1 : i32
 521   omp.yield (%1 : i32)
 522 }
 523 atomic {
 524 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
 525   %2 = llvm.load %arg3 : !llvm.ptr -> i32
 526   llvm.atomicrmw add %arg2, %2 monotonic : !llvm.ptr, i32
 527   omp.yield
 528 }
 529
 530 // CHECK-LABEL: @parallel_nested_workshare_reduction
 531 llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
 532   %c1 = llvm.mlir.constant(1 : i32) : i32
 533   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 534
 535   %lb = llvm.mlir.constant(1 : i64) : i64
 536   %step = llvm.mlir.constant(1 : i64) : i64
 537
 538   omp.parallel {
 539     omp.wsloop reduction(@add_i32 %0 -> %prv : !llvm.ptr) {
 540       omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
 541         %ival = llvm.trunc %iv : i64 to i32
 542         %lprv = llvm.load %prv : !llvm.ptr -> i32
 543         %add = llvm.add %lprv, %ival : i32
 544         llvm.store %add, %prv : i32, !llvm.ptr
 545         omp.yield
 546       }
 547     }
 548     omp.terminator
 549   }
 550
 551   llvm.return
 552 }
 553
 554 // Call to the outlined function.
 555 // CHECK: call void {{.*}} @__kmpc_fork_call
 556 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
 557
 558 // Outlined function.
 559 // CHECK: define internal void @[[OUTLINED]]
 560
 561 // Private reduction variable and its initialization.
 562 // CHECK: %[[PRIVATE:[0-9]+]] = alloca i32
 563 // CHECK: store i32 0, ptr %[[PRIVATE]]
 564
 565 // Loop exit:
 566 // CHECK: call void @__kmpc_barrier
 567
 568 // Call to the reduction function.
 569 // CHECK: call i32 @__kmpc_reduce
 570 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 571
 572 // Atomic reduction:
 573 // CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
 574 // CHECK: atomicrmw add ptr %{{.*}}, i32 %[[PARTIAL]]
 575
 576 // Non-atomic reduction:
 577 // CHECK: add i32
 578 // CHECK: call void @__kmpc_end_reduce
 579
 580 // Update of the private variable using the reduction region
 581 // (the body block currently comes after all the other blocks).
 582 // CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
 583 // CHECK: %[[UPDATED:.+]] = add i32 %[[PARTIAL]], {{.*}}
 584 // CHECK: store i32 %[[UPDATED]], ptr %[[PRIVATE]]
 585
 586 // Reduction function.
 587 // CHECK: define internal void @[[REDFUNC]]
 588 // CHECK: add i32
 589
 590 // -----
 591
 592 omp.declare_reduction @add_f32 : f32
 593 init {
 594 ^bb0(%arg: f32):
 595   %0 = llvm.mlir.constant(0.0 : f32) : f32
 596   omp.yield (%0 : f32)
 597 }
 598 combiner {
 599 ^bb1(%arg0: f32, %arg1: f32):
 600   %1 = llvm.fadd %arg0, %arg1 : f32
 601   omp.yield (%1 : f32)
 602 }
 603 atomic {
 604 ^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
 605   %2 = llvm.load %arg3 : !llvm.ptr -> f32
 606   llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
 607   omp.yield
 608 }
 609
 610 // CHECK-LABEL: @wsloop_simd_reduction
 611 llvm.func @wsloop_simd_reduction(%lb : i64, %ub : i64, %step : i64) {
 612   %c1 = llvm.mlir.constant(1 : i32) : i32
 613   %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
 614   omp.parallel {
 615     omp.wsloop reduction(@add_f32 %0 -> %prv1 : !llvm.ptr) {
 616       omp.simd reduction(@add_f32 %prv1 -> %prv2 : !llvm.ptr) {
 617         omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
 618           %1 = llvm.mlir.constant(2.0 : f32) : f32
 619           %2 = llvm.load %prv2 : !llvm.ptr -> f32
 620           %3 = llvm.fadd %1, %2 : f32
 621           llvm.store %3, %prv2 : f32, !llvm.ptr
 622           omp.yield
 623         }
 624       } {omp.composite}
 625     } {omp.composite}
 626     omp.terminator
 627   }
 628   llvm.return
 629 }
 630
 631 // Same checks as for wsloop reduction, because currently omp.simd is ignored in
 632 // a composite 'do/for simd' construct.
 633 // Call to the outlined function.
 634 // CHECK: call void {{.*}} @__kmpc_fork_call
 635 // CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
 636
 637 // Outlined function.
 638 // CHECK: define internal void @[[OUTLINED]]
 639
 640 // Private reduction variable and its initialization.
 641 // CHECK: %[[PRIVATE:.+]] = alloca float
 642 // CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
 643
 644 // Call to the reduction function.
 645 // CHECK: call i32 @__kmpc_reduce
 646 // CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
 647
 648 // Atomic reduction.
 649 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
 650 // CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
 651
 652 // Non-atomic reduction:
 653 // CHECK: fadd float
 654 // CHECK: call void @__kmpc_end_reduce
 655 // CHECK: br label %[[FINALIZE:.+]]
 656
 657 // CHECK: [[FINALIZE]]:
 658 // CHECK: call void @__kmpc_barrier
 659
 660 // Update of the private variable using the reduction region
 661 // (the body block currently comes after all the other blocks).
 662 // CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
 663 // CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]]
 664 // CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
 665
 666 // Reduction function.
 667 // CHECK: define internal void @[[REDFUNC]]
 668 // CHECK: fadd float