test/CodeGen/WebAssembly/simd-arith.ll

   1 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
   2 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
   3 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
   4 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
   5 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128
   6 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
   7
   8 ; check that a non-test run (including explicit locals pass) at least finishes
   9 ; RUN: llc < %s -O0 -mattr=+unimplemented-simd128
  10 ; RUN: llc < %s -O2 -mattr=+unimplemented-simd128
  11
  12 ; Test that basic SIMD128 arithmetic operations assemble as expected.
  13
  14 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
  15 target triple = "wasm32-unknown-unknown"
  16
  17 ; ==============================================================================
  18 ; 16 x i8
  19 ; ==============================================================================
  20 ; CHECK-LABEL: add_v16i8:
  21 ; NO-SIMD128-NOT: i8x16
  22 ; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
  23 ; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
  24 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  25 define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
  26   %a = add <16 x i8> %x, %y
  27   ret <16 x i8> %a
  28 }
  29
  30 ; CHECK-LABEL: sub_v16i8:
  31 ; NO-SIMD128-NOT: i8x16
  32 ; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
  33 ; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
  34 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  35 define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
  36   %a = sub <16 x i8> %x, %y
  37   ret <16 x i8> %a
  38 }
  39
  40 ; CHECK-LABEL: mul_v16i8:
  41 ; NO-SIMD128-NOT: i8x16
  42 ; SIMD128-NEXT: .functype mul_v16i8 (v128, v128) -> (v128){{$}}
  43 ; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
  44 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  45 define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
  46   %a = mul <16 x i8> %x, %y
  47   ret <16 x i8> %a
  48 }
  49
  50 ; CHECK-LABEL: neg_v16i8:
  51 ; NO-SIMD128-NOT: i8x16
  52 ; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
  53 ; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
  54 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  55 define <16 x i8> @neg_v16i8(<16 x i8> %x) {
  56   %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
  57                       i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
  58                      %x
  59   ret <16 x i8> %a
  60 }
  61
  62 ; CHECK-LABEL: shl_v16i8:
  63 ; NO-SIMD128-NOT: i8x16
  64 ; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
  65 ; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
  66 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  67 define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
  68   %t = insertelement <16 x i8> undef, i8 %x, i32 0
  69   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
  70     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
  71                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  72   %a = shl <16 x i8> %v, %s
  73   ret <16 x i8> %a
  74 }
  75
  76 ; CHECK-LABEL: shl_const_v16i8:
  77 ; NO-SIMD128-NOT: i8x16
  78 ; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
  79 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
  80 ; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
  81 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  82 define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
  83   %a = shl <16 x i8> %v,
  84     <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
  85      i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
  86   ret <16 x i8> %a
  87 }
  88
  89 ; CHECK-LABEL: shl_vec_v16i8:
  90 ; NO-SIMD128-NOT: i8x16
  91 ; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
  92 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
  93 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
  94 ; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
  95 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
  96 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
  97 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
  98 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
  99 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 100 ; Skip 14 lanes
 101 ; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
 102 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 103 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 104 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
 105 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 106 define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 107   %a = shl <16 x i8> %v, %x
 108   ret <16 x i8> %a
 109 }
 110
 111 ; CHECK-LABEL: shr_s_v16i8:
 112 ; NO-SIMD128-NOT: i8x16
 113 ; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
 114 ; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 115 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 116 define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
 117   %t = insertelement <16 x i8> undef, i8 %x, i32 0
 118   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
 119     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
 120                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 121   %a = ashr <16 x i8> %v, %s
 122   ret <16 x i8> %a
 123 }
 124
 125 ; CHECK-LABEL: shr_s_vec_v16i8:
 126 ; NO-SIMD128-NOT: i8x16
 127 ; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
 128 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 129 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 130 ; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 131 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 132 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 133 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 134 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 135 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 136 ; Skip 14 lanes
 137 ; SIMD128:      i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 15{{$}}
 138 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 15{{$}}
 139 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 140 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 15, $pop[[L2]]{{$}}
 141 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 142 define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 143   %a = ashr <16 x i8> %v, %x
 144   ret <16 x i8> %a
 145 }
 146
 147 ; CHECK-LABEL: shr_u_v16i8:
 148 ; NO-SIMD128-NOT: i8x16
 149 ; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
 150 ; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 151 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 152 define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
 153   %t = insertelement <16 x i8> undef, i8 %x, i32 0
 154   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
 155     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
 156                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 157   %a = lshr <16 x i8> %v, %s
 158   ret <16 x i8> %a
 159 }
 160
 161 ; CHECK-LABEL: shr_u_vec_v16i8:
 162 ; NO-SIMD128-NOT: i8x16
 163 ; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
 164 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
 165 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 166 ; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 167 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 168 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 169 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 170 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 171 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 172 ; Skip 14 lanes
 173 ; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
 174 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 175 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 176 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
 177 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 178 define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 179   %a = lshr <16 x i8> %v, %x
 180   ret <16 x i8> %a
 181 }
 182
 183 ; CHECK-LABEL: and_v16i8:
 184 ; NO-SIMD128-NOT: v128
 185 ; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
 186 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 187 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 188 define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
 189   %a = and <16 x i8> %x, %y
 190   ret <16 x i8> %a
 191 }
 192
 193 ; CHECK-LABEL: or_v16i8:
 194 ; NO-SIMD128-NOT: v128
 195 ; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
 196 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 197 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 198 define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
 199   %a = or <16 x i8> %x, %y
 200   ret <16 x i8> %a
 201 }
 202
 203 ; CHECK-LABEL: xor_v16i8:
 204 ; NO-SIMD128-NOT: v128
 205 ; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
 206 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 207 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 208 define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
 209   %a = xor <16 x i8> %x, %y
 210   ret <16 x i8> %a
 211 }
 212
 213 ; CHECK-LABEL: not_v16i8:
 214 ; NO-SIMD128-NOT: v128
 215 ; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
 216 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 217 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 218 define <16 x i8> @not_v16i8(<16 x i8> %x) {
 219   %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
 220                           i8 -1, i8 -1, i8 -1, i8 -1,
 221                           i8 -1, i8 -1, i8 -1, i8 -1,
 222                           i8 -1, i8 -1, i8 -1, i8 -1>
 223   ret <16 x i8> %a
 224 }
 225
 226 ; CHECK-LABEL: bitselect_v16i8:
 227 ; NO-SIMD128-NOT: v128
 228 ; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
 229 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 230 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 231 ; SIMD128-FAST-NEXT: v128.and
 232 ; SIMD128-FAST-NEXT: v128.not
 233 ; SIMD128-FAST-NEXT: v128.and
 234 ; SIMD128-FAST-NEXT: v128.or
 235 ; SIMD128-FAST-NEXT: return
 236 define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
 237   %masked_v1 = and <16 x i8> %c, %v1
 238   %inv_mask = xor <16 x i8> %c,
 239     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
 240      i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 241   %masked_v2 = and <16 x i8> %inv_mask, %v2
 242   %a = or <16 x i8> %masked_v1, %masked_v2
 243   ret <16 x i8> %a
 244 }
 245
 246 ; ==============================================================================
 247 ; 8 x i16
 248 ; ==============================================================================
 249 ; CHECK-LABEL: add_v8i16:
 250 ; NO-SIMD128-NOT: i16x8
 251 ; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
 252 ; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 253 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 254 define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
 255   %a = add <8 x i16> %x, %y
 256   ret <8 x i16> %a
 257 }
 258
 259 ; CHECK-LABEL: sub_v8i16:
 260 ; NO-SIMD128-NOT: i16x8
 261 ; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
 262 ; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 263 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 264 define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
 265   %a = sub <8 x i16> %x, %y
 266   ret <8 x i16> %a
 267 }
 268
 269 ; CHECK-LABEL: mul_v8i16:
 270 ; NO-SIMD128-NOT: i16x8
 271 ; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
 272 ; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
 273 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 274 define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
 275   %a = mul <8 x i16> %x, %y
 276   ret <8 x i16> %a
 277 }
 278
 279 ; CHECK-LABEL: neg_v8i16:
 280 ; NO-SIMD128-NOT: i16x8
 281 ; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
 282 ; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
 283 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 284 define <8 x i16> @neg_v8i16(<8 x i16> %x) {
 285   %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
 286                      %x
 287   ret <8 x i16> %a
 288 }
 289
 290 ; CHECK-LABEL: shl_v8i16:
 291 ; NO-SIMD128-NOT: i16x8
 292 ; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
 293 ; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 294 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 295 define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
 296   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 297   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 298     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 299   %a = shl <8 x i16> %v, %s
 300   ret <8 x i16> %a
 301 }
 302
 303 ; CHECK-LABEL: shl_const_v8i16:
 304 ; NO-SIMD128-NOT: i16x8
 305 ; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
 306 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
 307 ; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 308 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 309 define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
 310   %a = shl <8 x i16> %v,
 311     <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
 312   ret <8 x i16> %a
 313 }
 314
 315 ; CHECK-LABEL: shl_vec_v8i16:
 316 ; NO-SIMD128-NOT: i16x8
 317 ; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
 318 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 319 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 320 ; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 321 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 322 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 323 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 324 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 325 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 326 ; Skip 6 lanes
 327 ; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
 328 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 329 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 330 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
 331 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 332 define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 333   %a = shl <8 x i16> %v, %x
 334   ret <8 x i16> %a
 335 }
 336
 337 ; CHECK-LABEL: shr_s_v8i16:
 338 ; NO-SIMD128-NOT: i16x8
 339 ; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
 340 ; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 341 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 342 define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
 343   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 344   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 345     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 346   %a = ashr <8 x i16> %v, %s
 347   ret <8 x i16> %a
 348 }
 349
 350 ; CHECK-LABEL: shr_s_vec_v8i16:
 351 ; NO-SIMD128-NOT: i16x8
 352 ; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
 353 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 354 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 355 ; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 356 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 357 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 358 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 359 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 360 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 361 ; Skip 6 lanes
 362 ; SIMD128:      i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 7{{$}}
 363 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 7{{$}}
 364 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 365 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 7, $pop[[L2]]{{$}}
 366 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 367 define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 368   %a = ashr <8 x i16> %v, %x
 369   ret <8 x i16> %a
 370 }
 371
 372 ; CHECK-LABEL: shr_u_v8i16:
 373 ; NO-SIMD128-NOT: i16x8
 374 ; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
 375 ; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 376 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 377 define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
 378   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 379   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 380     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 381   %a = lshr <8 x i16> %v, %s
 382   ret <8 x i16> %a
 383 }
 384
 385 ; CHECK-LABEL: shr_u_vec_v8i16:
 386 ; NO-SIMD128-NOT: i16x8
 387 ; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
 388 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
 389 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 390 ; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 391 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 392 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 393 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 394 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 395 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 396 ; Skip 6 lanes
 397 ; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
 398 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 399 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 400 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
 401 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 402 define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 403   %a = lshr <8 x i16> %v, %x
 404   ret <8 x i16> %a
 405 }
 406
 407 ; CHECK-LABEL: and_v8i16:
 408 ; NO-SIMD128-NOT: v128
 409 ; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
 410 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 411 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 412 define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
 413   %a = and <8 x i16> %x, %y
 414   ret <8 x i16> %a
 415 }
 416
 417 ; CHECK-LABEL: or_v8i16:
 418 ; NO-SIMD128-NOT: v128
 419 ; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
 420 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 421 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 422 define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
 423   %a = or <8 x i16> %x, %y
 424   ret <8 x i16> %a
 425 }
 426
 427 ; CHECK-LABEL: xor_v8i16:
 428 ; NO-SIMD128-NOT: v128
 429 ; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
 430 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 431 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 432 define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
 433   %a = xor <8 x i16> %x, %y
 434   ret <8 x i16> %a
 435 }
 436
 437 ; CHECK-LABEL: not_v8i16:
 438 ; NO-SIMD128-NOT: v128
 439 ; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
 440 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 441 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 442 define <8 x i16> @not_v8i16(<8 x i16> %x) {
 443   %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
 444                           i16 -1, i16 -1, i16 -1, i16 -1>
 445   ret <8 x i16> %a
 446 }
 447
 448 ; CHECK-LABEL: bitselect_v8i16:
 449 ; NO-SIMD128-NOT: v128
 450 ; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
 451 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 452 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 453 ; SIMD128-FAST-NEXT: v128.and
 454 ; SIMD128-FAST-NEXT: v128.not
 455 ; SIMD128-FAST-NEXT: v128.and
 456 ; SIMD128-FAST-NEXT: v128.or
 457 ; SIMD128-FAST-NEXT: return
 458 define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
 459   %masked_v1 = and <8 x i16> %v1, %c
 460   %inv_mask = xor <8 x i16>
 461     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
 462     %c
 463   %masked_v2 = and <8 x i16> %v2, %inv_mask
 464   %a = or <8 x i16> %masked_v1, %masked_v2
 465   ret <8 x i16> %a
 466 }
 467
 468 ; ==============================================================================
 469 ; 4 x i32
 470 ; ==============================================================================
 471 ; CHECK-LABEL: add_v4i32:
 472 ; NO-SIMD128-NOT: i32x4
 473 ; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
 474 ; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 475 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 476 define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
 477   %a = add <4 x i32> %x, %y
 478   ret <4 x i32> %a
 479 }
 480
 481 ; CHECK-LABEL: sub_v4i32:
 482 ; NO-SIMD128-NOT: i32x4
 483 ; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
 484 ; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 485 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 486 define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
 487   %a = sub <4 x i32> %x, %y
 488   ret <4 x i32> %a
 489 }
 490
 491 ; CHECK-LABEL: mul_v4i32:
 492 ; NO-SIMD128-NOT: i32x4
 493 ; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
 494 ; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
 495 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 496 define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
 497   %a = mul <4 x i32> %x, %y
 498   ret <4 x i32> %a
 499 }
 500
 501 ; CHECK-LABEL: neg_v4i32:
 502 ; NO-SIMD128-NOT: i32x4
 503 ; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
 504 ; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
 505 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 506 define <4 x i32> @neg_v4i32(<4 x i32> %x) {
 507   %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
 508   ret <4 x i32> %a
 509 }
 510
 511 ; CHECK-LABEL: shl_v4i32:
 512 ; NO-SIMD128-NOT: i32x4
 513 ; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
 514 ; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 515 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 516 define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
 517   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 518   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 519     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 520   %a = shl <4 x i32> %v, %s
 521   ret <4 x i32> %a
 522 }
 523
 524 ; CHECK-LABEL: shl_const_v4i32:
 525 ; NO-SIMD128-NOT: i32x4
 526 ; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
 527 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
 528 ; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 529 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 530 define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
 531   %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
 532   ret <4 x i32> %a
 533 }
 534
 535 ; CHECK-LABEL: shl_vec_v4i32:
 536 ; NO-SIMD128-NOT: i32x4
 537 ; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
 538 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 539 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 540 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 541 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 542 ; Skip 2 lanes
 543 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 544 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 545 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 546 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 547 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 548 define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 549   %a = shl <4 x i32> %v, %x
 550   ret <4 x i32> %a
 551 }
 552
 553 ; CHECK-LABEL: shr_s_v4i32:
 554 ; NO-SIMD128-NOT: i32x4
 555 ; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
 556 ; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 557 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 558 define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
 559   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 560   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 561     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 562   %a = ashr <4 x i32> %v, %s
 563   ret <4 x i32> %a
 564 }
 565
 566 ; CHECK-LABEL: shr_s_vec_v4i32:
 567 ; NO-SIMD128-NOT: i32x4
 568 ; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
 569 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 570 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 571 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 572 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 573 ; Skip 2 lanes
 574 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 575 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 576 ; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 577 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 578 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 579 define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 580   %a = ashr <4 x i32> %v, %x
 581   ret <4 x i32> %a
 582 }
 583
 584 ; CHECK-LABEL: shr_u_v4i32:
 585 ; NO-SIMD128-NOT: i32x4
 586 ; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
 587 ; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 588 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 589 define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
 590   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 591   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 592     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 593   %a = lshr <4 x i32> %v, %s
 594   ret <4 x i32> %a
 595 }
 596
 597 ; CHECK-LABEL: shr_u_vec_v4i32:
 598 ; NO-SIMD128-NOT: i32x4
 599 ; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
 600 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 601 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 602 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 603 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 604 ; Skip 2 lanes
 605 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 606 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 607 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 608 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 609 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 610 define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 611   %a = lshr <4 x i32> %v, %x
 612   ret <4 x i32> %a
 613 }
 614
 615 ; CHECK-LABEL: and_v4i32:
 616 ; NO-SIMD128-NOT: v128
 617 ; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
 618 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 619 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 620 define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
 621   %a = and <4 x i32> %x, %y
 622   ret <4 x i32> %a
 623 }
 624
 625 ; CHECK-LABEL: or_v4i32:
 626 ; NO-SIMD128-NOT: v128
 627 ; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
 628 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 629 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 630 define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
 631   %a = or <4 x i32> %x, %y
 632   ret <4 x i32> %a
 633 }
 634
 635 ; CHECK-LABEL: xor_v4i32:
 636 ; NO-SIMD128-NOT: v128
 637 ; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
 638 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 639 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 640 define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
 641   %a = xor <4 x i32> %x, %y
 642   ret <4 x i32> %a
 643 }
 644
 645 ; CHECK-LABEL: not_v4i32:
 646 ; NO-SIMD128-NOT: v128
 647 ; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
 648 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 649 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 650 define <4 x i32> @not_v4i32(<4 x i32> %x) {
 651   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
 652   ret <4 x i32> %a
 653 }
 654
 655 ; CHECK-LABEL: bitselect_v4i32:
 656 ; NO-SIMD128-NOT: v128
 657 ; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
 658 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 659 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 660 ; SIMD128-FAST-NEXT: v128.not
 661 ; SIMD128-FAST-NEXT: v128.and
 662 ; SIMD128-FAST-NEXT: v128.and
 663 ; SIMD128-FAST-NEXT: v128.or
 664 ; SIMD128-FAST-NEXT: return
 665 define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
 666   %masked_v1 = and <4 x i32> %c, %v1
 667   %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
 668   %masked_v2 = and <4 x i32> %inv_mask, %v2
 669   %a = or <4 x i32> %masked_v2, %masked_v1
 670   ret <4 x i32> %a
 671 }
 672
 673 ; ==============================================================================
 674 ; 2 x i64
 675 ; ==============================================================================
 676 ; CHECK-LABEL: add_v2i64:
 677 ; NO-SIMD128-NOT: i64x2
 678 ; SIMD128-VM-NOT: i64x2
 679 ; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
 680 ; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 681 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 682 define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
 683   %a = add <2 x i64> %x, %y
 684   ret <2 x i64> %a
 685 }
 686
 687 ; CHECK-LABEL: sub_v2i64:
 688 ; NO-SIMD128-NOT: i64x2
 689 ; SIMD128-VM-NOT: i64x2
 690 ; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
 691 ; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 692 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 693 define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
 694   %a = sub <2 x i64> %x, %y
 695   ret <2 x i64> %a
 696 }
 697
 698 ; v2i64.mul is not in spec
 699 ; CHECK-LABEL: mul_v2i64:
 700 ; NO-SIMD128-NOT: i64x2
 701 ; SIMD128-VM-NOT: i64x2
 702 ; SIMD128-NOT: i64x2.mul
 703 ; SIMD128: i64x2.extract_lane
 704 ; SIMD128: i64.mul
 705 define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
 706   %a = mul <2 x i64> %x, %y
 707   ret <2 x i64> %a
 708 }
 709
 710 ; CHECK-LABEL: neg_v2i64:
 711 ; NO-SIMD128-NOT: i64x2
 712 ; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
 713 ; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
 714 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 715 define <2 x i64> @neg_v2i64(<2 x i64> %x) {
 716   %a = sub <2 x i64> <i64 0, i64 0>, %x
 717   ret <2 x i64> %a
 718 }
 719
 720 ; CHECK-LABEL: shl_v2i64:
 721 ; NO-SIMD128-NOT: i64x2
 722 ; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
 723 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 724 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 725 define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
 726   %x2 = zext i32 %x to i64
 727   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 728   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 729   %a = shl <2 x i64> %v, %s
 730   ret <2 x i64> %a
 731 }
 732
 733 ; CHECK-LABEL: shl_nozext_v2i64:
 734 ; NO-SIMD128-NOT: i64x2
 735 ; SIMD128-NEXT: .functype shl_nozext_v2i64 (v128, i64) -> (v128){{$}}
 736 ; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
 737 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 738 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 739 define <2 x i64> @shl_nozext_v2i64(<2 x i64> %v, i64 %x) {
 740   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 741   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 742   %a = shl <2 x i64> %v, %s
 743   ret <2 x i64> %a
 744 }
 745
 746 ; CHECK-LABEL: shl_const_v2i64:
 747 ; NO-SIMD128-NOT: i64x2
 748 ; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
 749 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 750 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 751 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 752 define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
 753   %a = shl <2 x i64> %v, <i64 5, i64 5>
 754   ret <2 x i64> %a
 755 }
 756
 757 ; CHECK-LABEL: shl_vec_v2i64:
 758 ; NO-SIMD128-NOT: i64x2
 759 ; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
 760 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 761 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 762 ; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 763 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 764 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 765 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 766 ; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 767 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 768 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 769 define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 770   %a = shl <2 x i64> %v, %x
 771   ret <2 x i64> %a
 772 }
 773
 774 ; CHECK-LABEL: shr_s_v2i64:
 775 ; NO-SIMD128-NOT: i64x2
 776 ; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
 777 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 778 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 779 define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
 780   %x2 = zext i32 %x to i64
 781   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 782   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 783   %a = ashr <2 x i64> %v, %s
 784   ret <2 x i64> %a
 785 }
 786
 787 ; CHECK-LABEL: shr_s_nozext_v2i64:
 788 ; NO-SIMD128-NOT: i64x2
 789 ; SIMD128-NEXT: .functype shr_s_nozext_v2i64 (v128, i64) -> (v128){{$}}
 790 ; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
 791 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 792 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 793 define <2 x i64> @shr_s_nozext_v2i64(<2 x i64> %v, i64 %x) {
 794   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 795   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 796   %a = ashr <2 x i64> %v, %s
 797   ret <2 x i64> %a
 798 }
 799
 800 ; CHECK-LABEL: shr_s_const_v2i64:
 801 ; NO-SIMD128-NOT: i64x2
 802 ; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
 803 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 804 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 805 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 806 define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
 807   %a = ashr <2 x i64> %v, <i64 5, i64 5>
 808   ret <2 x i64> %a
 809 }
 810
 811 ; CHECK-LABEL: shr_s_vec_v2i64:
 812 ; NO-SIMD128-NOT: i64x2
 813 ; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
 814 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 815 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 816 ; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 817 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 818 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 819 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 820 ; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 821 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 822 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 823 define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 824   %a = ashr <2 x i64> %v, %x
 825   ret <2 x i64> %a
 826 }
 827
 828 ; CHECK-LABEL: shr_u_v2i64:
 829 ; NO-SIMD128-NOT: i64x2
 830 ; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
 831 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 832 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 833 define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
 834   %x2 = zext i32 %x to i64
 835   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 836   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 837   %a = lshr <2 x i64> %v, %s
 838   ret <2 x i64> %a
 839 }
 840
 841 ; CHECK-LABEL: shr_u_nozext_v2i64:
 842 ; NO-SIMD128-NOT: i64x2
 843 ; SIMD128-NEXT: .functype shr_u_nozext_v2i64 (v128, i64) -> (v128){{$}}
 844 ; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
 845 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 846 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 847 define <2 x i64> @shr_u_nozext_v2i64(<2 x i64> %v, i64 %x) {
 848   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 849   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 850   %a = lshr <2 x i64> %v, %s
 851   ret <2 x i64> %a
 852 }
 853
 854 ; CHECK-LABEL: shr_u_const_v2i64:
 855 ; NO-SIMD128-NOT: i64x2
 856 ; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
 857 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 858 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 859 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 860 define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
 861   %a = lshr <2 x i64> %v, <i64 5, i64 5>
 862   ret <2 x i64> %a
 863 }
 864
 865 ; CHECK-LABEL: shr_u_vec_v2i64:
 866 ; NO-SIMD128-NOT: i64x2
 867 ; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
 868 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 869 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 870 ; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 871 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 872 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 873 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 874 ; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 875 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 876 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 877 define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 878   %a = lshr <2 x i64> %v, %x
 879   ret <2 x i64> %a
 880 }
 881
 882 ; CHECK-LABEL: and_v2i64:
 883 ; NO-SIMD128-NOT: v128
 884 ; SIMD128-VM-NOT: v128
 885 ; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
 886 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 887 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 888 define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
 889   %a = and <2 x i64> %x, %y
 890   ret <2 x i64> %a
 891 }
 892
 893 ; CHECK-LABEL: or_v2i64:
 894 ; NO-SIMD128-NOT: v128
 895 ; SIMD128-VM-NOT: v128
 896 ; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
 897 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 898 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 899 define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
 900   %a = or <2 x i64> %x, %y
 901   ret <2 x i64> %a
 902 }
 903
 904 ; CHECK-LABEL: xor_v2i64:
 905 ; NO-SIMD128-NOT: v128
 906 ; SIMD128-VM-NOT: v128
 907 ; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
 908 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 909 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 910 define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
 911   %a = xor <2 x i64> %x, %y
 912   ret <2 x i64> %a
 913 }
 914
 915 ; CHECK-LABEL: not_v2i64:
 916 ; NO-SIMD128-NOT: v128
 917 ; SIMD128-VM-NOT: v128
 918 ; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
 919 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 920 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 921 define <2 x i64> @not_v2i64(<2 x i64> %x) {
 922   %a = xor <2 x i64> %x, <i64 -1, i64 -1>
 923   ret <2 x i64> %a
 924 }
 925
 926 ; CHECK-LABEL: bitselect_v2i64:
 927 ; NO-SIMD128-NOT: v128
 928 ; SIMD128-VM-NOT: v128
 929 ; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
 930 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 931 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 932 ; SIMD128-FAST-NEXT: v128.not
 933 ; SIMD128-FAST-NEXT: v128.and
 934 ; SIMD128-FAST-NEXT: v128.and
 935 ; SIMD128-FAST-NEXT: v128.or
 936 ; SIMD128-FAST-NEXT: return
 937 define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
 938   %masked_v1 = and <2 x i64> %v1, %c
 939   %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
 940   %masked_v2 = and <2 x i64> %v2, %inv_mask
 941   %a = or <2 x i64> %masked_v2, %masked_v1
 942   ret <2 x i64> %a
 943 }
 944
 945 ; ==============================================================================
 946 ; 4 x float
 947 ; ==============================================================================
 948 ; CHECK-LABEL: neg_v4f32:
 949 ; NO-SIMD128-NOT: f32x4
 950 ; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
 951 ; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
 952 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 953 define <4 x float> @neg_v4f32(<4 x float> %x) {
 954   ; nsz makes this semantically equivalent to flipping sign bit
 955   %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
 956   ret <4 x float> %a
 957 }
 958
 959 ; CHECK-LABEL: abs_v4f32:
 960 ; NO-SIMD128-NOT: f32x4
 961 ; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
 962 ; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
 963 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 964 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
 965 define <4 x float> @abs_v4f32(<4 x float> %x) {
 966   %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
 967   ret <4 x float> %a
 968 }
 969
 970 ; CHECK-LABEL: min_unordered_v4f32:
 971 ; NO-SIMD128-NOT: f32x4
 972 ; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
 973 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
 974 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
 975 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
 976 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 977 define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
 978   %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
 979   %a = select <4 x i1> %cmps, <4 x float> %x,
 980     <4 x float> <float 5., float 5., float 5., float 5.>
 981   ret <4 x float> %a
 982 }
 983
 984 ; CHECK-LABEL: max_unordered_v4f32:
 985 ; NO-SIMD128-NOT: f32x4
 986 ; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
 987 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
 988 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
 989 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
 990 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 991 define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
 992   %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
 993   %a = select <4 x i1> %cmps, <4 x float> %x,
 994     <4 x float> <float 5., float 5., float 5., float 5.>
 995   ret <4 x float> %a
 996 }
 997
 998 ; CHECK-LABEL: min_ordered_v4f32:
 999 ; NO-SIMD128-NOT: f32x4
1000 ; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
1001 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1002 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1003 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1004 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1005 define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
1006   %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
1007   %a = select <4 x i1> %cmps,
1008     <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1009   ret <4 x float> %a
1010 }
1011
1012 ; CHECK-LABEL: max_ordered_v4f32:
1013 ; NO-SIMD128-NOT: f32x4
1014 ; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1015 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1016 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1017 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1018 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1019 define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1020   %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1021   %a = select <4 x i1> %cmps,
1022     <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1023   ret <4 x float> %a
1024 }
1025
1026 ; CHECK-LABEL: min_intrinsic_v4f32:
1027 ; NO-SIMD128-NOT: f32x4
1028 ; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1029 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1030 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1031 declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1032 define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1033   %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1034   ret <4 x float> %a
1035 }
1036
1037 ; CHECK-LABEL: max_intrinsic_v4f32:
1038 ; NO-SIMD128-NOT: f32x4
1039 ; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1040 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1041 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1042 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1043 define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1044   %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1045   ret <4 x float> %a
1046 }
1047
1048 ; CHECK-LABEL: min_const_intrinsic_v4f32:
1049 ; NO-SIMD128-NOT: f32x4
1050 ; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1051 ; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1052 ; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1053 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1054 define <4 x float> @min_const_intrinsic_v4f32() {
1055   %a = call <4 x float> @llvm.minimum.v4f32(
1056     <4 x float> <float 42., float 42., float 42., float 42.>,
1057     <4 x float> <float 5., float 5., float 5., float 5.>
1058   )
1059   ret <4 x float> %a
1060 }
1061
1062 ; CHECK-LABEL: max_const_intrinsic_v4f32:
1063 ; NO-SIMD128-NOT: f32x4
1064 ; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1065 ; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1066 ; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1067 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1068 define <4 x float> @max_const_intrinsic_v4f32() {
1069   %a = call <4 x float> @llvm.maximum.v4f32(
1070     <4 x float> <float 42., float 42., float 42., float 42.>,
1071     <4 x float> <float 5., float 5., float 5., float 5.>
1072   )
1073   ret <4 x float> %a
1074 }
1075
1076 ; CHECK-LABEL: add_v4f32:
1077 ; NO-SIMD128-NOT: f32x4
1078 ; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1079 ; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1080 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1081 define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1082   %a = fadd <4 x float> %x, %y
1083   ret <4 x float> %a
1084 }
1085
1086 ; CHECK-LABEL: sub_v4f32:
1087 ; NO-SIMD128-NOT: f32x4
1088 ; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1089 ; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1090 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1091 define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1092   %a = fsub <4 x float> %x, %y
1093   ret <4 x float> %a
1094 }
1095
1096 ; CHECK-LABEL: div_v4f32:
1097 ; NO-SIMD128-NOT: f32x4
1098 ; SIMD128-VM-NOT: f32x4.div
1099 ; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1100 ; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1101 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1102 define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1103   %a = fdiv <4 x float> %x, %y
1104   ret <4 x float> %a
1105 }
1106
1107 ; CHECK-LABEL: mul_v4f32:
1108 ; NO-SIMD128-NOT: f32x4
1109 ; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1110 ; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1111 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1112 define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1113   %a = fmul <4 x float> %x, %y
1114   ret <4 x float> %a
1115 }
1116
1117 ; CHECK-LABEL: sqrt_v4f32:
1118 ; NO-SIMD128-NOT: f32x4
1119 ; SIMD128-VM-NOT: f32x4.sqrt
1120 ; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1121 ; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1122 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1123 declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1124 define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1125   %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1126   ret <4 x float> %a
1127 }
1128
1129 ; ==============================================================================
1130 ; 2 x double
1131 ; ==============================================================================
1132 ; CHECK-LABEL: neg_v2f64:
1133 ; NO-SIMD128-NOT: f64x2
1134 ; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1135 ; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1136 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1137 define <2 x double> @neg_v2f64(<2 x double> %x) {
1138   ; nsz makes this semantically equivalent to flipping sign bit
1139   %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1140   ret <2 x double> %a
1141 }
1142
1143 ; CHECK-LABEL: abs_v2f64:
1144 ; NO-SIMD128-NOT: f64x2
1145 ; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1146 ; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1147 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1148 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1149 define <2 x double> @abs_v2f64(<2 x double> %x) {
1150   %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1151   ret <2 x double> %a
1152 }
1153
1154 ; CHECK-LABEL: min_unordered_v2f64:
1155 ; NO-SIMD128-NOT: f64x2
1156 ; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1157 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1158 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1159 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1160 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1161 define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1162   %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1163   %a = select <2 x i1> %cmps, <2 x double> %x,
1164     <2 x double> <double 5., double 5.>
1165   ret <2 x double> %a
1166 }
1167
1168 ; CHECK-LABEL: max_unordered_v2f64:
1169 ; NO-SIMD128-NOT: f64x2
1170 ; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1171 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1172 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1173 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1174 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1175 define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1176   %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1177   %a = select <2 x i1> %cmps, <2 x double> %x,
1178     <2 x double> <double 5., double 5.>
1179   ret <2 x double> %a
1180 }
1181
1182 ; CHECK-LABEL: min_ordered_v2f64:
1183 ; NO-SIMD128-NOT: f64x2
1184 ; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1185 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1186 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1187 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1188 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1189 define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1190   %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1191   %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1192     <2 x double> %x
1193   ret <2 x double> %a
1194 }
1195
1196 ; CHECK-LABEL: max_ordered_v2f64:
1197 ; NO-SIMD128-NOT: f64x2
1198 ; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1199 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1200 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1201 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1202 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1203 define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1204   %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1205   %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1206     <2 x double> %x
1207   ret <2 x double> %a
1208 }
1209
1210 ; CHECK-LABEL: min_intrinsic_v2f64:
1211 ; NO-SIMD128-NOT: f64x2
1212 ; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1213 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1214 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1215 declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1216 define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1217   %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1218   ret <2 x double> %a
1219 }
1220
1221 ; CHECK-LABEL: max_intrinsic_v2f64:
1222 ; NO-SIMD128-NOT: f64x2
1223 ; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1224 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1225 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1226 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1227 define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1228   %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1229   ret <2 x double> %a
1230 }
1231
1232 ; CHECK-LABEL: min_const_intrinsic_v2f64:
1233 ; NO-SIMD128-NOT: f64x2
1234 ; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1235 ; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1236 ; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1237 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1238 define <2 x double> @min_const_intrinsic_v2f64() {
1239   %a = call <2 x double> @llvm.minimum.v2f64(
1240     <2 x double> <double 42., double 42.>,
1241     <2 x double> <double 5., double 5.>
1242   )
1243   ret <2 x double> %a
1244 }
1245
1246 ; CHECK-LABEL: max_const_intrinsic_v2f64:
1247 ; NO-SIMD128-NOT: f64x2
1248 ; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1249 ; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1250 ; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1251 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1252 define <2 x double> @max_const_intrinsic_v2f64() {
1253   %a = call <2 x double> @llvm.maximum.v2f64(
1254     <2 x double> <double 42., double 42.>,
1255     <2 x double> <double 5., double 5.>
1256   )
1257   ret <2 x double> %a
1258 }
1259
1260 ; CHECK-LABEL: add_v2f64:
1261 ; NO-SIMD128-NOT: f64x2
1262 ; SIMD128-VM-NOT: f62x2
1263 ; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1264 ; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1265 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1266 define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1267   %a = fadd <2 x double> %x, %y
1268   ret <2 x double> %a
1269 }
1270
1271 ; CHECK-LABEL: sub_v2f64:
1272 ; NO-SIMD128-NOT: f64x2
1273 ; SIMD128-VM-NOT: f62x2
1274 ; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1275 ; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1276 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1277 define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1278   %a = fsub <2 x double> %x, %y
1279   ret <2 x double> %a
1280 }
1281
1282 ; CHECK-LABEL: div_v2f64:
1283 ; NO-SIMD128-NOT: f64x2
1284 ; SIMD128-VM-NOT: f62x2
1285 ; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1286 ; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1287 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1288 define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1289   %a = fdiv <2 x double> %x, %y
1290   ret <2 x double> %a
1291 }
1292
1293 ; CHECK-LABEL: mul_v2f64:
1294 ; NO-SIMD128-NOT: f64x2
1295 ; SIMD128-VM-NOT: f62x2
1296 ; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1297 ; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1298 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1299 define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1300   %a = fmul <2 x double> %x, %y
1301   ret <2 x double> %a
1302 }
1303
1304 ; CHECK-LABEL: sqrt_v2f64:
1305 ; NO-SIMD128-NOT: f64x2
1306 ; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1307 ; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1308 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1309 declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1310 define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1311   %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1312   ret <2 x double> %a
1313 }