test/Transforms/InstCombine/X86/x86-sse41.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -instcombine -S | FileCheck %s
   3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
   4
   5 define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
   6 ; CHECK-LABEL: @test_round_sd(
   7 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 10)
   8 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
   9 ;
  10   %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
  11   %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
  12   %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
  13   ret <2 x double> %3
  14 }
  15
  16 define double @test_round_sd_0(double %a, double %b) {
  17 ; CHECK-LABEL: @test_round_sd_0(
  18 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
  19 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
  20 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
  21 ; CHECK-NEXT:    ret double [[TMP3]]
  22 ;
  23   %1 = insertelement <2 x double> undef, double %a, i32 0
  24   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  25   %3 = insertelement <2 x double> undef, double %b, i32 0
  26   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  27   %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
  28   %6 = extractelement <2 x double> %5, i32 0
  29   ret double %6
  30 }
  31
  32 define double @test_round_sd_1(double %a, double %b) {
  33 ; CHECK-LABEL: @test_round_sd_1(
  34 ; CHECK-NEXT:    ret double 1.000000e+00
  35 ;
  36   %1 = insertelement <2 x double> undef, double %a, i32 0
  37   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
  38   %3 = insertelement <2 x double> undef, double %b, i32 0
  39   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
  40   %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
  41   %6 = extractelement <2 x double> %5, i32 1
  42   ret double %6
  43 }
  44
  45 define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
  46 ; CHECK-LABEL: @test_round_ss(
  47 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> [[B:%.*]], i32 10)
  48 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
  49 ;
  50   %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
  51   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
  52   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
  53   %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
  54   %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
  55   %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
  56   %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
  57   ret <4 x float> %7
  58 }
  59
  60 define float @test_round_ss_0(float %a, float %b) {
  61 ; CHECK-LABEL: @test_round_ss_0(
  62 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
  63 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
  64 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
  65 ; CHECK-NEXT:    ret float [[R]]
  66 ;
  67   %1 = insertelement <4 x float> undef, float %a, i32 0
  68   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  69   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  70   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  71   %5 = insertelement <4 x float> undef, float %b, i32 0
  72   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
  73   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
  74   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
  75   %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
  76   %r = extractelement <4 x float> %9, i32 0
  77   ret float %r
  78 }
  79
  80 define float @test_round_ss_2(float %a, float %b) {
  81 ; CHECK-LABEL: @test_round_ss_2(
  82 ; CHECK-NEXT:    ret float 2.000000e+00
  83 ;
  84   %1 = insertelement <4 x float> undef, float %a, i32 0
  85   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
  86   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
  87   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
  88   %5 = insertelement <4 x float> undef, float %b, i32 0
  89   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
  90   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
  91   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
  92   %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
  93   %r = extractelement <4 x float> %9, i32 2
  94   ret float %r
  95 }
  96
  97 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
  98 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone