test/CodeGen/ARM/urem-opt-size.ll

   1 ; When optimising for minimum size, we don't want to expand a div to a mul
   2 ; and a shift sequence. As a result, the urem instruction e.g. will not be
   3 ; expanded to a sequence of umull, lsrs, muls and sub instructions, but
   4 ; just a call to __aeabi_uidivmod.
   5 ;
   6 ; When the processor features hardware division, UDIV + UREM can be turned
   7 ; into UDIV + MLS. This prevents the library function __aeabi_uidivmod to be
   8 ; pulled into the binary. The test uses ARMv7-M.
   9 ;
  10 ; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | FileCheck %s
  11 ; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=V7M
  12
  13 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
  14 target triple = "thumbv7m-arm-none-eabi"
  15
  16 define i32 @foo1() local_unnamed_addr #0 {
  17 entry:
  18 ; CHECK-LABEL: foo1:
  19 ; CHECK:__aeabi_idiv
  20 ; CHECK-NOT: smmul
  21   %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  22   %div = sdiv i32 %call, 1000000
  23   ret i32 %div
  24 }
  25
  26 define i32 @foo2() local_unnamed_addr #0 {
  27 entry:
  28 ; CHECK-LABEL: foo2:
  29 ; CHECK: __aeabi_uidiv
  30 ; CHECK-NOT: umull
  31   %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  32   %div = udiv i32 %call, 1000000
  33   ret i32 %div
  34 }
  35
  36 ; Test for unsigned remainder
  37 define i32 @foo3() local_unnamed_addr #0 {
  38 entry:
  39 ; CHECK-LABEL: foo3:
  40 ; CHECK: __aeabi_uidivmod
  41 ; CHECK-NOT: umull
  42 ; V7M-LABEL: foo3:
  43 ; V7M: udiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
  44 ; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
  45 ; V7M-NOT: __aeabi_uidivmod
  46   %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  47   %rem = urem i32 %call, 1000000
  48   %cmp = icmp eq i32 %rem, 0
  49   %conv = zext i1 %cmp to i32
  50   ret i32 %conv
  51 }
  52
  53 ; Test for signed remainder
  54 define i32 @foo4() local_unnamed_addr #0 {
  55 entry:
  56 ; CHECK-LABEL: foo4:
  57 ; CHECK:__aeabi_idivmod
  58 ; V7M-LABEL: foo4:
  59 ; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
  60 ; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
  61 ; V7M-NOT: __aeabi_idivmod
  62   %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  63   %rem = srem i32 %call, 1000000
  64   ret i32 %rem
  65 }
  66
  67 ; Check that doing a sdiv+srem has the same effect as only the srem,
  68 ; as the division needs to be computed anyway in order to calculate
  69 ; the remainder (i.e. make sure we don't end up with two divisions).
  70 define i32 @foo5() local_unnamed_addr #0 {
  71 entry:
  72 ; CHECK-LABEL: foo5:
  73 ; CHECK:__aeabi_idivmod
  74 ; V7M-LABEL: foo5:
  75 ; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
  76 ; V7M-NOT: sdiv
  77 ; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
  78 ; V7M-NOT: __aeabi_idivmod
  79   %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
  80   %div = sdiv i32 %call, 1000000
  81   %rem = srem i32 %call, 1000000
  82   %add = add i32 %div, %rem
  83   ret i32 %add
  84 }
  85
  86 ; An early version of this patch caused isel to hang. The reason
  87 ; was that it shouldn't do the rewrite for i64 because that's not
  88 ; supported by hardware. Isel was stuck in a loop with type
  89 ; legalization and this optimisation.
  90 ; Function Attrs: norecurse nounwind
  91 define i64 @isel_dont_hang(i32 %bar) local_unnamed_addr #4 {
  92 entry:
  93 ; CHECK-LABEL: isel_dont_hang:
  94 ; CHECK: __aeabi_uldivmod
  95   %temp.0 = sext i32 %bar to i64
  96   %mul83 = shl i64 %temp.0, 1
  97   %add84 = add i64 %temp.0, 2
  98   %div85 = udiv i64 %mul83, %add84
  99   ret i64 %div85
 100 }
 101
 102 ; i16 types are promoted to i32, and we expect a normal udiv here:
 103 define i16 @isel_dont_hang_2(i16 %bar) local_unnamed_addr #4 {
 104 entry:
 105 ; CHECK-LABEL: isel_dont_hang_2:
 106 ; CHECK: udiv
 107 ; CHECK-NOT: __aeabi_
 108   %mul83 = shl i16 %bar, 1
 109   %add84 = add i16 %bar, 2
 110   %div85 = udiv i16 %mul83, %add84
 111   ret i16 %div85
 112 }
 113 declare i32 @GetValue(...) local_unnamed_addr
 114
 115 attributes #0 = { minsize nounwind optsize }
 116 attributes #4 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-jump-tables"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a15" "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4" "use-soft-float"="false" }
 117