test/CodeGen/WebAssembly/simd-arith.ll

   1 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
   2 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
   3 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
   4 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
   5 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128
   6 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
   7
   8 ; check that a non-test run (including explicit locals pass) at least finishes
   9 ; RUN: llc < %s -O0 -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext
  10 ; RUN: llc < %s -O2 -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext
  11
  12 ; Test that basic SIMD128 arithmetic operations assemble as expected.
  13
  14 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
  15 target triple = "wasm32-unknown-unknown"
  16
  17 ; ==============================================================================
  18 ; 16 x i8
  19 ; ==============================================================================
  20 ; CHECK-LABEL: add_v16i8:
  21 ; NO-SIMD128-NOT: i8x16
  22 ; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
  23 ; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
  24 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  25 define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
  26   %a = add <16 x i8> %x, %y
  27   ret <16 x i8> %a
  28 }
  29
  30 ; CHECK-LABEL: sub_v16i8:
  31 ; NO-SIMD128-NOT: i8x16
  32 ; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
  33 ; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
  34 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  35 define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
  36   %a = sub <16 x i8> %x, %y
  37   ret <16 x i8> %a
  38 }
  39
  40 ; CHECK-LABEL: mul_v16i8:
  41 ; NO-SIMD128-NOT: i8x16
  42 ; SIMD128-NEXT: .functype mul_v16i8 (v128, v128) -> (v128){{$}}
  43 ; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
  44 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  45 define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
  46   %a = mul <16 x i8> %x, %y
  47   ret <16 x i8> %a
  48 }
  49
  50 ; CHECK-LABEL: neg_v16i8:
  51 ; NO-SIMD128-NOT: i8x16
  52 ; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
  53 ; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
  54 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  55 define <16 x i8> @neg_v16i8(<16 x i8> %x) {
  56   %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
  57                       i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
  58                      %x
  59   ret <16 x i8> %a
  60 }
  61
  62 ; CHECK-LABEL: shl_v16i8:
  63 ; NO-SIMD128-NOT: i8x16
  64 ; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
  65 ; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
  66 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  67 define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
  68   %t = insertelement <16 x i8> undef, i8 %x, i32 0
  69   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
  70     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
  71                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  72   %a = shl <16 x i8> %v, %s
  73   ret <16 x i8> %a
  74 }
  75
  76 ; CHECK-LABEL: shl_const_v16i8:
  77 ; NO-SIMD128-NOT: i8x16
  78 ; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
  79 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
  80 ; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
  81 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  82 define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
  83   %a = shl <16 x i8> %v,
  84     <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
  85      i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
  86   ret <16 x i8> %a
  87 }
  88
  89 ; CHECK-LABEL: shl_vec_v16i8:
  90 ; NO-SIMD128-NOT: i8x16
  91 ; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
  92 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
  93 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
  94 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
  95 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
  96 ; Skip 14 lanes
  97 ; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
  98 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
  99 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 100 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
 101 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 102 define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 103   %a = shl <16 x i8> %v, %x
 104   ret <16 x i8> %a
 105 }
 106
 107 ; CHECK-LABEL: shr_s_v16i8:
 108 ; NO-SIMD128-NOT: i8x16
 109 ; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
 110 ; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 111 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 112 define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
 113   %t = insertelement <16 x i8> undef, i8 %x, i32 0
 114   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
 115     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
 116                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 117   %a = ashr <16 x i8> %v, %s
 118   ret <16 x i8> %a
 119 }
 120
 121 ; CHECK-LABEL: shr_s_vec_v16i8:
 122 ; NO-SIMD128-NOT: i8x16
 123 ; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
 124 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 125 ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 24{{$}}
 126 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 127 ; SIMD128-NEXT: i32.const $push[[L3:[0-9]+]]=, 24{{$}}
 128 ; SIMD128-NEXT: i32.shr_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
 129 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 0{{$}}
 130 ; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 131 ; SIMD128-NEXT: i8x16.splat $push[[L7:[0-9]+]]=, $pop[[L6]]{{$}}
 132 ; Skip 14 lanes
 133 ; SIMD128:      i8x16.extract_lane_s $push[[L7:[0-9]+]]=, $0, 15{{$}}
 134 ; SIMD128-NEXT: i32.const $push[[L8:[0-9]+]]=, 24{{$}}
 135 ; SIMD128-NEXT: i32.shl $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
 136 ; SIMD128-NEXT: i32.const $push[[L10:[0-9]+]]=, 24{{$}}
 137 ; SIMD128-NEXT: i32.shr_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
 138 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L12:[0-9]+]]=, $1, 15{{$}}
 139 ; SIMD128-NEXT: i32.shr_s $push[[L13:[0-9]+]]=, $pop[[L11]], $pop[[L12]]{{$}}
 140 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L14:[0-9]+]], 15, $pop[[L13]]{{$}}
 141 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 142 define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 143   %a = ashr <16 x i8> %v, %x
 144   ret <16 x i8> %a
 145 }
 146
 147 ; CHECK-LABEL: shr_u_v16i8:
 148 ; NO-SIMD128-NOT: i8x16
 149 ; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
 150 ; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 151 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 152 define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
 153   %t = insertelement <16 x i8> undef, i8 %x, i32 0
 154   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
 155     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
 156                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 157   %a = lshr <16 x i8> %v, %s
 158   ret <16 x i8> %a
 159 }
 160
 161 ; CHECK-LABEL: shr_u_vec_v16i8:
 162 ; NO-SIMD128-NOT: i8x16
 163 ; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
 164 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
 165 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 166 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 167 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 168 ; Skip 14 lanes
 169 ; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
 170 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 171 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 172 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
 173 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 174 define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 175   %a = lshr <16 x i8> %v, %x
 176   ret <16 x i8> %a
 177 }
 178
 179 ; CHECK-LABEL: and_v16i8:
 180 ; NO-SIMD128-NOT: v128
 181 ; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
 182 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 183 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 184 define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
 185   %a = and <16 x i8> %x, %y
 186   ret <16 x i8> %a
 187 }
 188
 189 ; CHECK-LABEL: or_v16i8:
 190 ; NO-SIMD128-NOT: v128
 191 ; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
 192 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 193 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 194 define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
 195   %a = or <16 x i8> %x, %y
 196   ret <16 x i8> %a
 197 }
 198
 199 ; CHECK-LABEL: xor_v16i8:
 200 ; NO-SIMD128-NOT: v128
 201 ; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
 202 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 203 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 204 define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
 205   %a = xor <16 x i8> %x, %y
 206   ret <16 x i8> %a
 207 }
 208
 209 ; CHECK-LABEL: not_v16i8:
 210 ; NO-SIMD128-NOT: v128
 211 ; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
 212 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 213 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 214 define <16 x i8> @not_v16i8(<16 x i8> %x) {
 215   %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
 216                           i8 -1, i8 -1, i8 -1, i8 -1,
 217                           i8 -1, i8 -1, i8 -1, i8 -1,
 218                           i8 -1, i8 -1, i8 -1, i8 -1>
 219   ret <16 x i8> %a
 220 }
 221
 222 ; CHECK-LABEL: bitselect_v16i8:
 223 ; NO-SIMD128-NOT: v128
 224 ; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
 225 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 226 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 227 ; SIMD128-FAST-NEXT: v128.and
 228 ; SIMD128-FAST-NEXT: v128.not
 229 ; SIMD128-FAST-NEXT: v128.and
 230 ; SIMD128-FAST-NEXT: v128.or
 231 ; SIMD128-FAST-NEXT: return
 232 define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
 233   %masked_v1 = and <16 x i8> %c, %v1
 234   %inv_mask = xor <16 x i8> %c,
 235     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
 236      i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 237   %masked_v2 = and <16 x i8> %inv_mask, %v2
 238   %a = or <16 x i8> %masked_v1, %masked_v2
 239   ret <16 x i8> %a
 240 }
 241
 242 ; ==============================================================================
 243 ; 8 x i16
 244 ; ==============================================================================
 245 ; CHECK-LABEL: add_v8i16:
 246 ; NO-SIMD128-NOT: i16x8
 247 ; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
 248 ; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 249 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 250 define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
 251   %a = add <8 x i16> %x, %y
 252   ret <8 x i16> %a
 253 }
 254
 255 ; CHECK-LABEL: sub_v8i16:
 256 ; NO-SIMD128-NOT: i16x8
 257 ; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
 258 ; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 259 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 260 define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
 261   %a = sub <8 x i16> %x, %y
 262   ret <8 x i16> %a
 263 }
 264
 265 ; CHECK-LABEL: mul_v8i16:
 266 ; NO-SIMD128-NOT: i16x8
 267 ; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
 268 ; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
 269 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 270 define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
 271   %a = mul <8 x i16> %x, %y
 272   ret <8 x i16> %a
 273 }
 274
 275 ; CHECK-LABEL: neg_v8i16:
 276 ; NO-SIMD128-NOT: i16x8
 277 ; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
 278 ; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
 279 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 280 define <8 x i16> @neg_v8i16(<8 x i16> %x) {
 281   %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
 282                      %x
 283   ret <8 x i16> %a
 284 }
 285
 286 ; CHECK-LABEL: shl_v8i16:
 287 ; NO-SIMD128-NOT: i16x8
 288 ; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
 289 ; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 290 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 291 define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
 292   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 293   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 294     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 295   %a = shl <8 x i16> %v, %s
 296   ret <8 x i16> %a
 297 }
 298
 299 ; CHECK-LABEL: shl_const_v8i16:
 300 ; NO-SIMD128-NOT: i16x8
 301 ; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
 302 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
 303 ; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 304 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 305 define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
 306   %a = shl <8 x i16> %v,
 307     <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
 308   ret <8 x i16> %a
 309 }
 310
 311 ; CHECK-LABEL: shl_vec_v8i16:
 312 ; NO-SIMD128-NOT: i16x8
 313 ; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
 314 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 315 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 316 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 317 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 318 ; Skip 6 lanes
 319 ; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
 320 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 321 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 322 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
 323 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 324 define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 325   %a = shl <8 x i16> %v, %x
 326   ret <8 x i16> %a
 327 }
 328
 329 ; CHECK-LABEL: shr_s_v8i16:
 330 ; NO-SIMD128-NOT: i16x8
 331 ; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
 332 ; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 333 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 334 define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
 335   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 336   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 337     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 338   %a = ashr <8 x i16> %v, %s
 339   ret <8 x i16> %a
 340 }
 341
 342 ; CHECK-LABEL: shr_s_vec_v8i16:
 343 ; NO-SIMD128-NOT: i16x8
 344 ; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
 345 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 346 ; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}}
 347 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 348 ; SIMD128-NEXT: i32.const $push[[L3:[0-9]+]]=, 16{{$}}
 349 ; SIMD128-NEXT: i32.shr_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
 350 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 0{{$}}
 351 ; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 352 ; SIMD128-NEXT: i16x8.splat $push[[L7:[0-9]+]]=, $pop[[L6]]{{$}}
 353 ; Skip 6 lanes
 354 ; SIMD128:      i16x8.extract_lane_s $push[[L7:[0-9]+]]=, $0, 7{{$}}
 355 ; SIMD128-NEXT: i32.const $push[[L8:[0-9]+]]=, 16{{$}}
 356 ; SIMD128-NEXT: i32.shl $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
 357 ; SIMD128-NEXT: i32.const $push[[L10:[0-9]+]]=, 16{{$}}
 358 ; SIMD128-NEXT: i32.shr_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
 359 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L12:[0-9]+]]=, $1, 7{{$}}
 360 ; SIMD128-NEXT: i32.shr_s $push[[L13:[0-9]+]]=, $pop[[L11]], $pop[[L12]]{{$}}
 361 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L14:[0-9]+]], 7, $pop[[L13]]{{$}}
 362 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 363 define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 364   %a = ashr <8 x i16> %v, %x
 365   ret <8 x i16> %a
 366 }
 367
 368 ; CHECK-LABEL: shr_u_v8i16:
 369 ; NO-SIMD128-NOT: i16x8
 370 ; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
 371 ; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 372 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 373 define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
 374   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 375   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 376     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 377   %a = lshr <8 x i16> %v, %s
 378   ret <8 x i16> %a
 379 }
 380
 381 ; CHECK-LABEL: shr_u_vec_v8i16:
 382 ; NO-SIMD128-NOT: i16x8
 383 ; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
 384 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
 385 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
 386 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 387 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 388 ; Skip 6 lanes
 389 ; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
 390 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 391 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 392 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
 393 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 394 define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 395   %a = lshr <8 x i16> %v, %x
 396   ret <8 x i16> %a
 397 }
 398
 399 ; CHECK-LABEL: and_v8i16:
 400 ; NO-SIMD128-NOT: v128
 401 ; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
 402 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 403 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 404 define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
 405   %a = and <8 x i16> %x, %y
 406   ret <8 x i16> %a
 407 }
 408
 409 ; CHECK-LABEL: or_v8i16:
 410 ; NO-SIMD128-NOT: v128
 411 ; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
 412 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 413 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 414 define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
 415   %a = or <8 x i16> %x, %y
 416   ret <8 x i16> %a
 417 }
 418
 419 ; CHECK-LABEL: xor_v8i16:
 420 ; NO-SIMD128-NOT: v128
 421 ; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
 422 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 423 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 424 define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
 425   %a = xor <8 x i16> %x, %y
 426   ret <8 x i16> %a
 427 }
 428
 429 ; CHECK-LABEL: not_v8i16:
 430 ; NO-SIMD128-NOT: v128
 431 ; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
 432 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 433 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 434 define <8 x i16> @not_v8i16(<8 x i16> %x) {
 435   %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
 436                           i16 -1, i16 -1, i16 -1, i16 -1>
 437   ret <8 x i16> %a
 438 }
 439
 440 ; CHECK-LABEL: bitselect_v8i16:
 441 ; NO-SIMD128-NOT: v128
 442 ; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
 443 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 444 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 445 ; SIMD128-FAST-NEXT: v128.and
 446 ; SIMD128-FAST-NEXT: v128.not
 447 ; SIMD128-FAST-NEXT: v128.and
 448 ; SIMD128-FAST-NEXT: v128.or
 449 ; SIMD128-FAST-NEXT: return
 450 define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
 451   %masked_v1 = and <8 x i16> %v1, %c
 452   %inv_mask = xor <8 x i16>
 453     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
 454     %c
 455   %masked_v2 = and <8 x i16> %v2, %inv_mask
 456   %a = or <8 x i16> %masked_v1, %masked_v2
 457   ret <8 x i16> %a
 458 }
 459
 460 ; ==============================================================================
 461 ; 4 x i32
 462 ; ==============================================================================
 463 ; CHECK-LABEL: add_v4i32:
 464 ; NO-SIMD128-NOT: i32x4
 465 ; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
 466 ; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 467 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 468 define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
 469   %a = add <4 x i32> %x, %y
 470   ret <4 x i32> %a
 471 }
 472
 473 ; CHECK-LABEL: sub_v4i32:
 474 ; NO-SIMD128-NOT: i32x4
 475 ; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
 476 ; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 477 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 478 define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
 479   %a = sub <4 x i32> %x, %y
 480   ret <4 x i32> %a
 481 }
 482
 483 ; CHECK-LABEL: mul_v4i32:
 484 ; NO-SIMD128-NOT: i32x4
 485 ; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
 486 ; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
 487 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 488 define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
 489   %a = mul <4 x i32> %x, %y
 490   ret <4 x i32> %a
 491 }
 492
 493 ; CHECK-LABEL: neg_v4i32:
 494 ; NO-SIMD128-NOT: i32x4
 495 ; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
 496 ; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
 497 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 498 define <4 x i32> @neg_v4i32(<4 x i32> %x) {
 499   %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
 500   ret <4 x i32> %a
 501 }
 502
 503 ; CHECK-LABEL: shl_v4i32:
 504 ; NO-SIMD128-NOT: i32x4
 505 ; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
 506 ; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 507 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 508 define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
 509   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 510   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 511     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 512   %a = shl <4 x i32> %v, %s
 513   ret <4 x i32> %a
 514 }
 515
 516 ; CHECK-LABEL: shl_const_v4i32:
 517 ; NO-SIMD128-NOT: i32x4
 518 ; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
 519 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
 520 ; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 521 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 522 define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
 523   %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
 524   ret <4 x i32> %a
 525 }
 526
 527 ; CHECK-LABEL: shl_vec_v4i32:
 528 ; NO-SIMD128-NOT: i32x4
 529 ; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
 530 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 531 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 532 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 533 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 534 ; Skip 2 lanes
 535 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 536 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 537 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 538 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 539 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 540 define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 541   %a = shl <4 x i32> %v, %x
 542   ret <4 x i32> %a
 543 }
 544
 545 ; CHECK-LABEL: shr_s_v4i32:
 546 ; NO-SIMD128-NOT: i32x4
 547 ; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
 548 ; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 549 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 550 define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
 551   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 552   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 553     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 554   %a = ashr <4 x i32> %v, %s
 555   ret <4 x i32> %a
 556 }
 557
 558 ; CHECK-LABEL: shr_s_vec_v4i32:
 559 ; NO-SIMD128-NOT: i32x4
 560 ; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
 561 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 562 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 563 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 564 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 565 ; Skip 2 lanes
 566 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 567 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 568 ; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 569 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 570 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 571 define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 572   %a = ashr <4 x i32> %v, %x
 573   ret <4 x i32> %a
 574 }
 575
 576 ; CHECK-LABEL: shr_u_v4i32:
 577 ; NO-SIMD128-NOT: i32x4
 578 ; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
 579 ; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 580 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 581 define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
 582   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 583   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 584     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 585   %a = lshr <4 x i32> %v, %s
 586   ret <4 x i32> %a
 587 }
 588
 589 ; CHECK-LABEL: shr_u_vec_v4i32:
 590 ; NO-SIMD128-NOT: i32x4
 591 ; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
 592 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 593 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 594 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 595 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 596 ; Skip 2 lanes
 597 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 598 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 599 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 600 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 601 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 602 define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 603   %a = lshr <4 x i32> %v, %x
 604   ret <4 x i32> %a
 605 }
 606
 607 ; CHECK-LABEL: and_v4i32:
 608 ; NO-SIMD128-NOT: v128
 609 ; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
 610 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 611 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 612 define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
 613   %a = and <4 x i32> %x, %y
 614   ret <4 x i32> %a
 615 }
 616
 617 ; CHECK-LABEL: or_v4i32:
 618 ; NO-SIMD128-NOT: v128
 619 ; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
 620 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 621 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 622 define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
 623   %a = or <4 x i32> %x, %y
 624   ret <4 x i32> %a
 625 }
 626
 627 ; CHECK-LABEL: xor_v4i32:
 628 ; NO-SIMD128-NOT: v128
 629 ; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
 630 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 631 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 632 define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
 633   %a = xor <4 x i32> %x, %y
 634   ret <4 x i32> %a
 635 }
 636
 637 ; CHECK-LABEL: not_v4i32:
 638 ; NO-SIMD128-NOT: v128
 639 ; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
 640 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 641 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 642 define <4 x i32> @not_v4i32(<4 x i32> %x) {
 643   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
 644   ret <4 x i32> %a
 645 }
 646
 647 ; CHECK-LABEL: bitselect_v4i32:
 648 ; NO-SIMD128-NOT: v128
 649 ; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
 650 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 651 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 652 ; SIMD128-FAST-NEXT: v128.not
 653 ; SIMD128-FAST-NEXT: v128.and
 654 ; SIMD128-FAST-NEXT: v128.and
 655 ; SIMD128-FAST-NEXT: v128.or
 656 ; SIMD128-FAST-NEXT: return
 657 define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
 658   %masked_v1 = and <4 x i32> %c, %v1
 659   %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
 660   %masked_v2 = and <4 x i32> %inv_mask, %v2
 661   %a = or <4 x i32> %masked_v2, %masked_v1
 662   ret <4 x i32> %a
 663 }
 664
 665 ; ==============================================================================
 666 ; 2 x i64
 667 ; ==============================================================================
 668 ; CHECK-LABEL: add_v2i64:
 669 ; NO-SIMD128-NOT: i64x2
 670 ; SIMD128-VM-NOT: i64x2
 671 ; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
 672 ; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 673 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 674 define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
 675   %a = add <2 x i64> %x, %y
 676   ret <2 x i64> %a
 677 }
 678
 679 ; CHECK-LABEL: sub_v2i64:
 680 ; NO-SIMD128-NOT: i64x2
 681 ; SIMD128-VM-NOT: i64x2
 682 ; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
 683 ; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 684 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 685 define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
 686   %a = sub <2 x i64> %x, %y
 687   ret <2 x i64> %a
 688 }
 689
 690 ; v2i64.mul is not in spec
 691 ; CHECK-LABEL: mul_v2i64:
 692 ; NO-SIMD128-NOT: i64x2
 693 ; SIMD128-VM-NOT: i64x2
 694 ; SIMD128-NOT: i64x2.mul
 695 ; SIMD128: i64x2.extract_lane
 696 ; SIMD128: i64.mul
 697 define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
 698   %a = mul <2 x i64> %x, %y
 699   ret <2 x i64> %a
 700 }
 701
 702 ; CHECK-LABEL: neg_v2i64:
 703 ; NO-SIMD128-NOT: i64x2
 704 ; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
 705 ; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
 706 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 707 define <2 x i64> @neg_v2i64(<2 x i64> %x) {
 708   %a = sub <2 x i64> <i64 0, i64 0>, %x
 709   ret <2 x i64> %a
 710 }
 711
 712 ; CHECK-LABEL: shl_v2i64:
 713 ; NO-SIMD128-NOT: i64x2
 714 ; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
 715 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 716 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 717 define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
 718   %x2 = zext i32 %x to i64
 719   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 720   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 721   %a = shl <2 x i64> %v, %s
 722   ret <2 x i64> %a
 723 }
 724
 725 ; CHECK-LABEL: shl_nozext_v2i64:
 726 ; NO-SIMD128-NOT: i64x2
 727 ; SIMD128-NEXT: .functype shl_nozext_v2i64 (v128, i64) -> (v128){{$}}
 728 ; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
 729 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 730 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 731 define <2 x i64> @shl_nozext_v2i64(<2 x i64> %v, i64 %x) {
 732   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 733   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 734   %a = shl <2 x i64> %v, %s
 735   ret <2 x i64> %a
 736 }
 737
 738 ; CHECK-LABEL: shl_const_v2i64:
 739 ; NO-SIMD128-NOT: i64x2
 740 ; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
 741 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 742 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 743 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 744 define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
 745   %a = shl <2 x i64> %v, <i64 5, i64 5>
 746   ret <2 x i64> %a
 747 }
 748
 749 ; CHECK-LABEL: shl_vec_v2i64:
 750 ; NO-SIMD128-NOT: i64x2
 751 ; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
 752 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 753 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 754 ; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 755 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 756 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 757 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 758 ; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 759 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 760 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 761 define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 762   %a = shl <2 x i64> %v, %x
 763   ret <2 x i64> %a
 764 }
 765
 766 ; CHECK-LABEL: shr_s_v2i64:
 767 ; NO-SIMD128-NOT: i64x2
 768 ; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
 769 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 770 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 771 define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
 772   %x2 = zext i32 %x to i64
 773   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 774   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 775   %a = ashr <2 x i64> %v, %s
 776   ret <2 x i64> %a
 777 }
 778
 779 ; CHECK-LABEL: shr_s_nozext_v2i64:
 780 ; NO-SIMD128-NOT: i64x2
 781 ; SIMD128-NEXT: .functype shr_s_nozext_v2i64 (v128, i64) -> (v128){{$}}
 782 ; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
 783 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 784 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 785 define <2 x i64> @shr_s_nozext_v2i64(<2 x i64> %v, i64 %x) {
 786   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 787   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 788   %a = ashr <2 x i64> %v, %s
 789   ret <2 x i64> %a
 790 }
 791
 792 ; CHECK-LABEL: shr_s_const_v2i64:
 793 ; NO-SIMD128-NOT: i64x2
 794 ; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
 795 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 796 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 797 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 798 define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
 799   %a = ashr <2 x i64> %v, <i64 5, i64 5>
 800   ret <2 x i64> %a
 801 }
 802
 803 ; CHECK-LABEL: shr_s_vec_v2i64:
 804 ; NO-SIMD128-NOT: i64x2
 805 ; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
 806 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 807 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 808 ; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 809 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 810 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 811 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 812 ; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 813 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 814 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 815 define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 816   %a = ashr <2 x i64> %v, %x
 817   ret <2 x i64> %a
 818 }
 819
 820 ; CHECK-LABEL: shr_u_v2i64:
 821 ; NO-SIMD128-NOT: i64x2
 822 ; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
 823 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 824 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 825 define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
 826   %x2 = zext i32 %x to i64
 827   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 828   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 829   %a = lshr <2 x i64> %v, %s
 830   ret <2 x i64> %a
 831 }
 832
 833 ; CHECK-LABEL: shr_u_nozext_v2i64:
 834 ; NO-SIMD128-NOT: i64x2
 835 ; SIMD128-NEXT: .functype shr_u_nozext_v2i64 (v128, i64) -> (v128){{$}}
 836 ; SIMD128-NEXT: i32.wrap/i64 $push[[L0:[0-9]+]]=, $1{{$}}
 837 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 838 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 839 define <2 x i64> @shr_u_nozext_v2i64(<2 x i64> %v, i64 %x) {
 840   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 841   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 842   %a = lshr <2 x i64> %v, %s
 843   ret <2 x i64> %a
 844 }
 845
 846 ; CHECK-LABEL: shr_u_const_v2i64:
 847 ; NO-SIMD128-NOT: i64x2
 848 ; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
 849 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 850 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 851 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 852 define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
 853   %a = lshr <2 x i64> %v, <i64 5, i64 5>
 854   ret <2 x i64> %a
 855 }
 856
 857 ; CHECK-LABEL: shr_u_vec_v2i64:
 858 ; NO-SIMD128-NOT: i64x2
 859 ; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
 860 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 861 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 862 ; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 863 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 864 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 865 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 866 ; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 867 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 868 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 869 define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 870   %a = lshr <2 x i64> %v, %x
 871   ret <2 x i64> %a
 872 }
 873
 874 ; CHECK-LABEL: and_v2i64:
 875 ; NO-SIMD128-NOT: v128
 876 ; SIMD128-VM-NOT: v128
 877 ; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
 878 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 879 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 880 define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
 881   %a = and <2 x i64> %x, %y
 882   ret <2 x i64> %a
 883 }
 884
 885 ; CHECK-LABEL: or_v2i64:
 886 ; NO-SIMD128-NOT: v128
 887 ; SIMD128-VM-NOT: v128
 888 ; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
 889 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 890 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 891 define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
 892   %a = or <2 x i64> %x, %y
 893   ret <2 x i64> %a
 894 }
 895
 896 ; CHECK-LABEL: xor_v2i64:
 897 ; NO-SIMD128-NOT: v128
 898 ; SIMD128-VM-NOT: v128
 899 ; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
 900 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 901 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 902 define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
 903   %a = xor <2 x i64> %x, %y
 904   ret <2 x i64> %a
 905 }
 906
 907 ; CHECK-LABEL: not_v2i64:
 908 ; NO-SIMD128-NOT: v128
 909 ; SIMD128-VM-NOT: v128
 910 ; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
 911 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 912 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 913 define <2 x i64> @not_v2i64(<2 x i64> %x) {
 914   %a = xor <2 x i64> %x, <i64 -1, i64 -1>
 915   ret <2 x i64> %a
 916 }
 917
 918 ; CHECK-LABEL: bitselect_v2i64:
 919 ; NO-SIMD128-NOT: v128
 920 ; SIMD128-VM-NOT: v128
 921 ; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
 922 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 923 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 924 ; SIMD128-FAST-NEXT: v128.not
 925 ; SIMD128-FAST-NEXT: v128.and
 926 ; SIMD128-FAST-NEXT: v128.and
 927 ; SIMD128-FAST-NEXT: v128.or
 928 ; SIMD128-FAST-NEXT: return
 929 define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
 930   %masked_v1 = and <2 x i64> %v1, %c
 931   %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
 932   %masked_v2 = and <2 x i64> %v2, %inv_mask
 933   %a = or <2 x i64> %masked_v2, %masked_v1
 934   ret <2 x i64> %a
 935 }
 936
 937 ; ==============================================================================
 938 ; 4 x float
 939 ; ==============================================================================
 940 ; CHECK-LABEL: neg_v4f32:
 941 ; NO-SIMD128-NOT: f32x4
 942 ; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
 943 ; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
 944 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 945 define <4 x float> @neg_v4f32(<4 x float> %x) {
 946   ; nsz makes this semantically equivalent to flipping sign bit
 947   %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
 948   ret <4 x float> %a
 949 }
 950
 951 ; CHECK-LABEL: abs_v4f32:
 952 ; NO-SIMD128-NOT: f32x4
 953 ; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
 954 ; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
 955 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 956 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
 957 define <4 x float> @abs_v4f32(<4 x float> %x) {
 958   %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
 959   ret <4 x float> %a
 960 }
 961
 962 ; CHECK-LABEL: min_unordered_v4f32:
 963 ; NO-SIMD128-NOT: f32x4
 964 ; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
 965 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
 966 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
 967 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
 968 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 969 define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
 970   %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
 971   %a = select <4 x i1> %cmps, <4 x float> %x,
 972     <4 x float> <float 5., float 5., float 5., float 5.>
 973   ret <4 x float> %a
 974 }
 975
 976 ; CHECK-LABEL: max_unordered_v4f32:
 977 ; NO-SIMD128-NOT: f32x4
 978 ; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
 979 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
 980 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
 981 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
 982 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 983 define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
 984   %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
 985   %a = select <4 x i1> %cmps, <4 x float> %x,
 986     <4 x float> <float 5., float 5., float 5., float 5.>
 987   ret <4 x float> %a
 988 }
 989
 990 ; CHECK-LABEL: min_ordered_v4f32:
 991 ; NO-SIMD128-NOT: f32x4
 992 ; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
 993 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
 994 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
 995 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
 996 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 997 define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
 998   %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
 999   %a = select <4 x i1> %cmps,
1000     <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1001   ret <4 x float> %a
1002 }
1003
1004 ; CHECK-LABEL: max_ordered_v4f32:
1005 ; NO-SIMD128-NOT: f32x4
1006 ; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1007 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1008 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1009 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1010 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1011 define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1012   %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1013   %a = select <4 x i1> %cmps,
1014     <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1015   ret <4 x float> %a
1016 }
1017
1018 ; CHECK-LABEL: min_intrinsic_v4f32:
1019 ; NO-SIMD128-NOT: f32x4
1020 ; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1021 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1022 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1023 declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1024 define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1025   %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1026   ret <4 x float> %a
1027 }
1028
1029 ; CHECK-LABEL: max_intrinsic_v4f32:
1030 ; NO-SIMD128-NOT: f32x4
1031 ; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1032 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1033 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1034 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1035 define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1036   %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1037   ret <4 x float> %a
1038 }
1039
1040 ; CHECK-LABEL: min_const_intrinsic_v4f32:
1041 ; NO-SIMD128-NOT: f32x4
1042 ; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1043 ; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1044 ; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1045 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1046 define <4 x float> @min_const_intrinsic_v4f32() {
1047   %a = call <4 x float> @llvm.minimum.v4f32(
1048     <4 x float> <float 42., float 42., float 42., float 42.>,
1049     <4 x float> <float 5., float 5., float 5., float 5.>
1050   )
1051   ret <4 x float> %a
1052 }
1053
1054 ; CHECK-LABEL: max_const_intrinsic_v4f32:
1055 ; NO-SIMD128-NOT: f32x4
1056 ; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1057 ; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1058 ; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1059 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1060 define <4 x float> @max_const_intrinsic_v4f32() {
1061   %a = call <4 x float> @llvm.maximum.v4f32(
1062     <4 x float> <float 42., float 42., float 42., float 42.>,
1063     <4 x float> <float 5., float 5., float 5., float 5.>
1064   )
1065   ret <4 x float> %a
1066 }
1067
1068 ; CHECK-LABEL: add_v4f32:
1069 ; NO-SIMD128-NOT: f32x4
1070 ; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1071 ; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1072 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1073 define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1074   %a = fadd <4 x float> %x, %y
1075   ret <4 x float> %a
1076 }
1077
1078 ; CHECK-LABEL: sub_v4f32:
1079 ; NO-SIMD128-NOT: f32x4
1080 ; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1081 ; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1082 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1083 define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1084   %a = fsub <4 x float> %x, %y
1085   ret <4 x float> %a
1086 }
1087
1088 ; CHECK-LABEL: div_v4f32:
1089 ; NO-SIMD128-NOT: f32x4
1090 ; SIMD128-VM-NOT: f32x4.div
1091 ; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1092 ; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1093 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1094 define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1095   %a = fdiv <4 x float> %x, %y
1096   ret <4 x float> %a
1097 }
1098
1099 ; CHECK-LABEL: mul_v4f32:
1100 ; NO-SIMD128-NOT: f32x4
1101 ; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1102 ; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1103 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1104 define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1105   %a = fmul <4 x float> %x, %y
1106   ret <4 x float> %a
1107 }
1108
1109 ; CHECK-LABEL: sqrt_v4f32:
1110 ; NO-SIMD128-NOT: f32x4
1111 ; SIMD128-VM-NOT: f32x4.sqrt
1112 ; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1113 ; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1114 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1115 declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1116 define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1117   %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1118   ret <4 x float> %a
1119 }
1120
1121 ; ==============================================================================
1122 ; 2 x double
1123 ; ==============================================================================
1124 ; CHECK-LABEL: neg_v2f64:
1125 ; NO-SIMD128-NOT: f64x2
1126 ; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1127 ; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1128 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1129 define <2 x double> @neg_v2f64(<2 x double> %x) {
1130   ; nsz makes this semantically equivalent to flipping sign bit
1131   %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1132   ret <2 x double> %a
1133 }
1134
1135 ; CHECK-LABEL: abs_v2f64:
1136 ; NO-SIMD128-NOT: f64x2
1137 ; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1138 ; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1139 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1140 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1141 define <2 x double> @abs_v2f64(<2 x double> %x) {
1142   %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1143   ret <2 x double> %a
1144 }
1145
1146 ; CHECK-LABEL: min_unordered_v2f64:
1147 ; NO-SIMD128-NOT: f64x2
1148 ; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1149 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1150 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1151 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1152 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1153 define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1154   %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1155   %a = select <2 x i1> %cmps, <2 x double> %x,
1156     <2 x double> <double 5., double 5.>
1157   ret <2 x double> %a
1158 }
1159
1160 ; CHECK-LABEL: max_unordered_v2f64:
1161 ; NO-SIMD128-NOT: f64x2
1162 ; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1163 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1164 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1165 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1166 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1167 define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1168   %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1169   %a = select <2 x i1> %cmps, <2 x double> %x,
1170     <2 x double> <double 5., double 5.>
1171   ret <2 x double> %a
1172 }
1173
1174 ; CHECK-LABEL: min_ordered_v2f64:
1175 ; NO-SIMD128-NOT: f64x2
1176 ; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1177 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1178 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1179 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1180 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1181 define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1182   %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1183   %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1184     <2 x double> %x
1185   ret <2 x double> %a
1186 }
1187
1188 ; CHECK-LABEL: max_ordered_v2f64:
1189 ; NO-SIMD128-NOT: f64x2
1190 ; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1191 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1192 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1193 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1194 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1195 define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1196   %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1197   %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1198     <2 x double> %x
1199   ret <2 x double> %a
1200 }
1201
1202 ; CHECK-LABEL: min_intrinsic_v2f64:
1203 ; NO-SIMD128-NOT: f64x2
1204 ; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1205 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1206 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1207 declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1208 define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1209   %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1210   ret <2 x double> %a
1211 }
1212
1213 ; CHECK-LABEL: max_intrinsic_v2f64:
1214 ; NO-SIMD128-NOT: f64x2
1215 ; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1216 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1217 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1218 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1219 define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1220   %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1221   ret <2 x double> %a
1222 }
1223
1224 ; CHECK-LABEL: min_const_intrinsic_v2f64:
1225 ; NO-SIMD128-NOT: f64x2
1226 ; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1227 ; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1228 ; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1229 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1230 define <2 x double> @min_const_intrinsic_v2f64() {
1231   %a = call <2 x double> @llvm.minimum.v2f64(
1232     <2 x double> <double 42., double 42.>,
1233     <2 x double> <double 5., double 5.>
1234   )
1235   ret <2 x double> %a
1236 }
1237
1238 ; CHECK-LABEL: max_const_intrinsic_v2f64:
1239 ; NO-SIMD128-NOT: f64x2
1240 ; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1241 ; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1242 ; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1243 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1244 define <2 x double> @max_const_intrinsic_v2f64() {
1245   %a = call <2 x double> @llvm.maximum.v2f64(
1246     <2 x double> <double 42., double 42.>,
1247     <2 x double> <double 5., double 5.>
1248   )
1249   ret <2 x double> %a
1250 }
1251
1252 ; CHECK-LABEL: add_v2f64:
1253 ; NO-SIMD128-NOT: f64x2
1254 ; SIMD128-VM-NOT: f62x2
1255 ; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1256 ; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1257 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1258 define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1259   %a = fadd <2 x double> %x, %y
1260   ret <2 x double> %a
1261 }
1262
1263 ; CHECK-LABEL: sub_v2f64:
1264 ; NO-SIMD128-NOT: f64x2
1265 ; SIMD128-VM-NOT: f62x2
1266 ; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1267 ; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1268 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1269 define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1270   %a = fsub <2 x double> %x, %y
1271   ret <2 x double> %a
1272 }
1273
1274 ; CHECK-LABEL: div_v2f64:
1275 ; NO-SIMD128-NOT: f64x2
1276 ; SIMD128-VM-NOT: f62x2
1277 ; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1278 ; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1279 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1280 define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1281   %a = fdiv <2 x double> %x, %y
1282   ret <2 x double> %a
1283 }
1284
1285 ; CHECK-LABEL: mul_v2f64:
1286 ; NO-SIMD128-NOT: f64x2
1287 ; SIMD128-VM-NOT: f62x2
1288 ; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1289 ; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1290 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1291 define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1292   %a = fmul <2 x double> %x, %y
1293   ret <2 x double> %a
1294 }
1295
1296 ; CHECK-LABEL: sqrt_v2f64:
1297 ; NO-SIMD128-NOT: f64x2
1298 ; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1299 ; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1300 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1301 declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1302 define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1303   %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1304   ret <2 x double> %a
1305 }