test/CodeGen/WebAssembly/simd-arith.ll

   1 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW
   2 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST
   3 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
   4 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM
   5 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128
   6 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128
   7
   8 ; check that a non-test run (including explicit locals pass) at least finishes
   9 ; RUN: llc < %s -O0 -mattr=+unimplemented-simd128
  10 ; RUN: llc < %s -O2 -mattr=+unimplemented-simd128
  11
  12 ; Test that basic SIMD128 arithmetic operations assemble as expected.
  13
  14 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
  15 target triple = "wasm32-unknown-unknown"
  16
  17 ; ==============================================================================
  18 ; 16 x i8
  19 ; ==============================================================================
  20 ; CHECK-LABEL: add_v16i8:
  21 ; NO-SIMD128-NOT: i8x16
  22 ; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}}
  23 ; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}}
  24 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  25 define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
  26   %a = add <16 x i8> %x, %y
  27   ret <16 x i8> %a
  28 }
  29
  30 ; CHECK-LABEL: sub_v16i8:
  31 ; NO-SIMD128-NOT: i8x16
  32 ; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}}
  33 ; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
  34 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  35 define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
  36   %a = sub <16 x i8> %x, %y
  37   ret <16 x i8> %a
  38 }
  39
  40 ; CHECK-LABEL: mul_v16i8:
  41 ; NO-SIMD128-NOT: i8x16
  42 ; SIMD128-NEXT: .functype mul_v16i8 (v128, v128) -> (v128){{$}}
  43 ; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
  44 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  45 define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
  46   %a = mul <16 x i8> %x, %y
  47   ret <16 x i8> %a
  48 }
  49
  50 ; CHECK-LABEL: neg_v16i8:
  51 ; NO-SIMD128-NOT: i8x16
  52 ; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}}
  53 ; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}}
  54 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  55 define <16 x i8> @neg_v16i8(<16 x i8> %x) {
  56   %a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
  57                       i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
  58                      %x
  59   ret <16 x i8> %a
  60 }
  61
  62 ; CHECK-LABEL: shl_v16i8:
  63 ; NO-SIMD128-NOT: i8x16
  64 ; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}}
  65 ; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
  66 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  67 define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
  68   %t = insertelement <16 x i8> undef, i8 %x, i32 0
  69   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
  70     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
  71                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  72   %a = shl <16 x i8> %v, %s
  73   ret <16 x i8> %a
  74 }
  75
  76 ; CHECK-LABEL: shl_const_v16i8:
  77 ; NO-SIMD128-NOT: i8x16
  78 ; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}}
  79 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
  80 ; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
  81 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  82 define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
  83   %a = shl <16 x i8> %v,
  84     <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
  85      i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
  86   ret <16 x i8> %a
  87 }
  88
  89 ; CHECK-LABEL: shl_vec_v16i8:
  90 ; NO-SIMD128-NOT: i8x16
  91 ; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
  92 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
  93 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
  94 ; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
  95 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
  96 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
  97 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
  98 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
  99 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 100 ; Skip 14 lanes
 101 ; SIMD128:      i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}}
 102 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 103 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 104 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
 105 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 106 define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 107   %a = shl <16 x i8> %v, %x
 108   ret <16 x i8> %a
 109 }
 110
 111 ; CHECK-LABEL: shr_s_v16i8:
 112 ; NO-SIMD128-NOT: i8x16
 113 ; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}}
 114 ; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 115 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 116 define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
 117   %t = insertelement <16 x i8> undef, i8 %x, i32 0
 118   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
 119     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
 120                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 121   %a = ashr <16 x i8> %v, %s
 122   ret <16 x i8> %a
 123 }
 124
 125 ; CHECK-LABEL: shr_s_vec_v16i8:
 126 ; NO-SIMD128-NOT: i8x16
 127 ; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
 128 ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 129 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 130 ; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 131 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 132 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 133 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 134 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 135 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 136 ; Skip 14 lanes
 137 ; SIMD128:      i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 15{{$}}
 138 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 15{{$}}
 139 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 140 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 15, $pop[[L2]]{{$}}
 141 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 142 define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 143   %a = ashr <16 x i8> %v, %x
 144   ret <16 x i8> %a
 145 }
 146
 147 ; CHECK-LABEL: shr_u_v16i8:
 148 ; NO-SIMD128-NOT: i8x16
 149 ; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}}
 150 ; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 151 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 152 define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
 153   %t = insertelement <16 x i8> undef, i8 %x, i32 0
 154   %s = shufflevector <16 x i8> %t, <16 x i8> undef,
 155     <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
 156                 i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 157   %a = lshr <16 x i8> %v, %s
 158   ret <16 x i8> %a
 159 }
 160
 161 ; CHECK-LABEL: shr_u_vec_v16i8:
 162 ; NO-SIMD128-NOT: i8x16
 163 ; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
 164 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
 165 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
 166 ; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 167 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 168 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 169 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 170 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 171 ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 172 ; Skip 14 lanes
 173 ; SIMD128:      i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}}
 174 ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}}
 175 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 176 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}}
 177 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 178 define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
 179   %a = lshr <16 x i8> %v, %x
 180   ret <16 x i8> %a
 181 }
 182
 183 ; CHECK-LABEL: and_v16i8:
 184 ; NO-SIMD128-NOT: v128
 185 ; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}}
 186 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 187 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 188 define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
 189   %a = and <16 x i8> %x, %y
 190   ret <16 x i8> %a
 191 }
 192
 193 ; CHECK-LABEL: or_v16i8:
 194 ; NO-SIMD128-NOT: v128
 195 ; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}}
 196 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 197 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 198 define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
 199   %a = or <16 x i8> %x, %y
 200   ret <16 x i8> %a
 201 }
 202
 203 ; CHECK-LABEL: xor_v16i8:
 204 ; NO-SIMD128-NOT: v128
 205 ; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}}
 206 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 207 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 208 define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
 209   %a = xor <16 x i8> %x, %y
 210   ret <16 x i8> %a
 211 }
 212
 213 ; CHECK-LABEL: not_v16i8:
 214 ; NO-SIMD128-NOT: v128
 215 ; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}}
 216 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 217 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 218 define <16 x i8> @not_v16i8(<16 x i8> %x) {
 219   %a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
 220                           i8 -1, i8 -1, i8 -1, i8 -1,
 221                           i8 -1, i8 -1, i8 -1, i8 -1,
 222                           i8 -1, i8 -1, i8 -1, i8 -1>
 223   ret <16 x i8> %a
 224 }
 225
 226 ; CHECK-LABEL: bitselect_v16i8:
 227 ; NO-SIMD128-NOT: v128
 228 ; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
 229 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 230 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 231 ; SIMD128-FAST-NEXT: v128.and
 232 ; SIMD128-FAST-NEXT: v128.not
 233 ; SIMD128-FAST-NEXT: v128.and
 234 ; SIMD128-FAST-NEXT: v128.or
 235 ; SIMD128-FAST-NEXT: return
 236 define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
 237   %masked_v1 = and <16 x i8> %c, %v1
 238   %inv_mask = xor <16 x i8> %c,
 239     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
 240      i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 241   %masked_v2 = and <16 x i8> %inv_mask, %v2
 242   %a = or <16 x i8> %masked_v1, %masked_v2
 243   ret <16 x i8> %a
 244 }
 245
 246 ; ==============================================================================
 247 ; 8 x i16
 248 ; ==============================================================================
 249 ; CHECK-LABEL: add_v8i16:
 250 ; NO-SIMD128-NOT: i16x8
 251 ; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}}
 252 ; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 253 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 254 define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
 255   %a = add <8 x i16> %x, %y
 256   ret <8 x i16> %a
 257 }
 258
 259 ; CHECK-LABEL: sub_v8i16:
 260 ; NO-SIMD128-NOT: i16x8
 261 ; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}}
 262 ; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 263 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 264 define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
 265   %a = sub <8 x i16> %x, %y
 266   ret <8 x i16> %a
 267 }
 268
 269 ; CHECK-LABEL: mul_v8i16:
 270 ; NO-SIMD128-NOT: i16x8
 271 ; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}}
 272 ; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
 273 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 274 define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
 275   %a = mul <8 x i16> %x, %y
 276   ret <8 x i16> %a
 277 }
 278
 279 ; CHECK-LABEL: neg_v8i16:
 280 ; NO-SIMD128-NOT: i16x8
 281 ; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}}
 282 ; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}}
 283 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 284 define <8 x i16> @neg_v8i16(<8 x i16> %x) {
 285   %a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
 286                      %x
 287   ret <8 x i16> %a
 288 }
 289
 290 ; CHECK-LABEL: shl_v8i16:
 291 ; NO-SIMD128-NOT: i16x8
 292 ; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}}
 293 ; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 294 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 295 define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
 296   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 297   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 298     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 299   %a = shl <8 x i16> %v, %s
 300   ret <8 x i16> %a
 301 }
 302
 303 ; CHECK-LABEL: shl_const_v8i16:
 304 ; NO-SIMD128-NOT: i16x8
 305 ; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}}
 306 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
 307 ; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 308 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 309 define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
 310   %a = shl <8 x i16> %v,
 311     <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
 312   ret <8 x i16> %a
 313 }
 314
 315 ; CHECK-LABEL: shl_vec_v8i16:
 316 ; NO-SIMD128-NOT: i16x8
 317 ; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
 318 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 319 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 320 ; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 321 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 322 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 323 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 324 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 325 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 326 ; Skip 6 lanes
 327 ; SIMD128:      i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}}
 328 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 329 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 330 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
 331 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 332 define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 333   %a = shl <8 x i16> %v, %x
 334   ret <8 x i16> %a
 335 }
 336
 337 ; CHECK-LABEL: shr_s_v8i16:
 338 ; NO-SIMD128-NOT: i16x8
 339 ; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}}
 340 ; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 341 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 342 define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
 343   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 344   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 345     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 346   %a = ashr <8 x i16> %v, %s
 347   ret <8 x i16> %a
 348 }
 349
 350 ; CHECK-LABEL: shr_s_vec_v8i16:
 351 ; NO-SIMD128-NOT: i16x8
 352 ; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
 353 ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
 354 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 355 ; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 356 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 357 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 358 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 359 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 360 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 361 ; Skip 6 lanes
 362 ; SIMD128:      i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 7{{$}}
 363 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 7{{$}}
 364 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 365 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 7, $pop[[L2]]{{$}}
 366 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 367 define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 368   %a = ashr <8 x i16> %v, %x
 369   ret <8 x i16> %a
 370 }
 371
 372 ; CHECK-LABEL: shr_u_v8i16:
 373 ; NO-SIMD128-NOT: i16x8
 374 ; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}}
 375 ; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 376 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 377 define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
 378   %t = insertelement <8 x i16> undef, i16 %x, i32 0
 379   %s = shufflevector <8 x i16> %t, <8 x i16> undef,
 380     <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 381   %a = lshr <8 x i16> %v, %s
 382   ret <8 x i16> %a
 383 }
 384
 385 ; CHECK-LABEL: shr_u_vec_v8i16:
 386 ; NO-SIMD128-NOT: i16x8
 387 ; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
 388 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
 389 ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
 390 ; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
 391 ; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
 392 ; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
 393 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
 394 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 395 ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 396 ; Skip 6 lanes
 397 ; SIMD128:      i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}}
 398 ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}}
 399 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 400 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}}
 401 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 402 define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
 403   %a = lshr <8 x i16> %v, %x
 404   ret <8 x i16> %a
 405 }
 406
 407 ; CHECK-LABEL: and_v8i16:
 408 ; NO-SIMD128-NOT: v128
 409 ; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}}
 410 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 411 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 412 define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
 413   %a = and <8 x i16> %x, %y
 414   ret <8 x i16> %a
 415 }
 416
 417 ; CHECK-LABEL: or_v8i16:
 418 ; NO-SIMD128-NOT: v128
 419 ; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}}
 420 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 421 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 422 define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
 423   %a = or <8 x i16> %x, %y
 424   ret <8 x i16> %a
 425 }
 426
 427 ; CHECK-LABEL: xor_v8i16:
 428 ; NO-SIMD128-NOT: v128
 429 ; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}}
 430 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 431 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 432 define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
 433   %a = xor <8 x i16> %x, %y
 434   ret <8 x i16> %a
 435 }
 436
 437 ; CHECK-LABEL: not_v8i16:
 438 ; NO-SIMD128-NOT: v128
 439 ; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}}
 440 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 441 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 442 define <8 x i16> @not_v8i16(<8 x i16> %x) {
 443   %a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
 444                           i16 -1, i16 -1, i16 -1, i16 -1>
 445   ret <8 x i16> %a
 446 }
 447
 448 ; CHECK-LABEL: bitselect_v8i16:
 449 ; NO-SIMD128-NOT: v128
 450 ; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
 451 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 452 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 453 ; SIMD128-FAST-NEXT: v128.and
 454 ; SIMD128-FAST-NEXT: v128.not
 455 ; SIMD128-FAST-NEXT: v128.and
 456 ; SIMD128-FAST-NEXT: v128.or
 457 ; SIMD128-FAST-NEXT: return
 458 define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
 459   %masked_v1 = and <8 x i16> %v1, %c
 460   %inv_mask = xor <8 x i16>
 461     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>,
 462     %c
 463   %masked_v2 = and <8 x i16> %v2, %inv_mask
 464   %a = or <8 x i16> %masked_v1, %masked_v2
 465   ret <8 x i16> %a
 466 }
 467
 468 ; ==============================================================================
 469 ; 4 x i32
 470 ; ==============================================================================
 471 ; CHECK-LABEL: add_v4i32:
 472 ; NO-SIMD128-NOT: i32x4
 473 ; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}}
 474 ; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 475 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 476 define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
 477   %a = add <4 x i32> %x, %y
 478   ret <4 x i32> %a
 479 }
 480
 481 ; CHECK-LABEL: sub_v4i32:
 482 ; NO-SIMD128-NOT: i32x4
 483 ; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}}
 484 ; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 485 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 486 define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
 487   %a = sub <4 x i32> %x, %y
 488   ret <4 x i32> %a
 489 }
 490
 491 ; CHECK-LABEL: mul_v4i32:
 492 ; NO-SIMD128-NOT: i32x4
 493 ; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}}
 494 ; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
 495 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 496 define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
 497   %a = mul <4 x i32> %x, %y
 498   ret <4 x i32> %a
 499 }
 500
 501 ; CHECK-LABEL: neg_v4i32:
 502 ; NO-SIMD128-NOT: i32x4
 503 ; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}}
 504 ; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
 505 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 506 define <4 x i32> @neg_v4i32(<4 x i32> %x) {
 507   %a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
 508   ret <4 x i32> %a
 509 }
 510
 511 ; CHECK-LABEL: shl_v4i32:
 512 ; NO-SIMD128-NOT: i32x4
 513 ; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}}
 514 ; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 515 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 516 define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
 517   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 518   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 519     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 520   %a = shl <4 x i32> %v, %s
 521   ret <4 x i32> %a
 522 }
 523
 524 ; CHECK-LABEL: shl_const_v4i32:
 525 ; NO-SIMD128-NOT: i32x4
 526 ; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}}
 527 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5
 528 ; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 529 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 530 define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
 531   %a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
 532   ret <4 x i32> %a
 533 }
 534
 535 ; CHECK-LABEL: shl_vec_v4i32:
 536 ; NO-SIMD128-NOT: i32x4
 537 ; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}}
 538 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 539 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 540 ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 541 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 542 ; Skip 2 lanes
 543 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 544 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 545 ; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 546 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 547 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 548 define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 549   %a = shl <4 x i32> %v, %x
 550   ret <4 x i32> %a
 551 }
 552
 553 ; CHECK-LABEL: shr_s_v4i32:
 554 ; NO-SIMD128-NOT: i32x4
 555 ; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}}
 556 ; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 557 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 558 define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
 559   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 560   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 561     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 562   %a = ashr <4 x i32> %v, %s
 563   ret <4 x i32> %a
 564 }
 565
 566 ; CHECK-LABEL: shr_s_vec_v4i32:
 567 ; NO-SIMD128-NOT: i32x4
 568 ; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}}
 569 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 570 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 571 ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 572 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 573 ; Skip 2 lanes
 574 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 575 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 576 ; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 577 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 578 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 579 define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 580   %a = ashr <4 x i32> %v, %x
 581   ret <4 x i32> %a
 582 }
 583
 584 ; CHECK-LABEL: shr_u_v4i32:
 585 ; NO-SIMD128-NOT: i32x4
 586 ; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}}
 587 ; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 588 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 589 define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
 590   %t = insertelement <4 x i32> undef, i32 %x, i32 0
 591   %s = shufflevector <4 x i32> %t, <4 x i32> undef,
 592     <4 x i32> <i32 0, i32 0, i32 0, i32 0>
 593   %a = lshr <4 x i32> %v, %s
 594   ret <4 x i32> %a
 595 }
 596
 597 ; CHECK-LABEL: shr_u_vec_v4i32:
 598 ; NO-SIMD128-NOT: i32x4
 599 ; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}}
 600 ; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 601 ; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 602 ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 603 ; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 604 ; Skip 2 lanes
 605 ; SIMD128:      i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}}
 606 ; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}}
 607 ; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 608 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}}
 609 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 610 define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
 611   %a = lshr <4 x i32> %v, %x
 612   ret <4 x i32> %a
 613 }
 614
 615 ; CHECK-LABEL: and_v4i32:
 616 ; NO-SIMD128-NOT: v128
 617 ; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}}
 618 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 619 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 620 define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
 621   %a = and <4 x i32> %x, %y
 622   ret <4 x i32> %a
 623 }
 624
 625 ; CHECK-LABEL: or_v4i32:
 626 ; NO-SIMD128-NOT: v128
 627 ; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}}
 628 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 629 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 630 define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
 631   %a = or <4 x i32> %x, %y
 632   ret <4 x i32> %a
 633 }
 634
 635 ; CHECK-LABEL: xor_v4i32:
 636 ; NO-SIMD128-NOT: v128
 637 ; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}}
 638 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 639 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 640 define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
 641   %a = xor <4 x i32> %x, %y
 642   ret <4 x i32> %a
 643 }
 644
 645 ; CHECK-LABEL: not_v4i32:
 646 ; NO-SIMD128-NOT: v128
 647 ; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}}
 648 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 649 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 650 define <4 x i32> @not_v4i32(<4 x i32> %x) {
 651   %a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
 652   ret <4 x i32> %a
 653 }
 654
 655 ; CHECK-LABEL: bitselect_v4i32:
 656 ; NO-SIMD128-NOT: v128
 657 ; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
 658 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 659 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 660 ; SIMD128-FAST-NEXT: v128.not
 661 ; SIMD128-FAST-NEXT: v128.and
 662 ; SIMD128-FAST-NEXT: v128.and
 663 ; SIMD128-FAST-NEXT: v128.or
 664 ; SIMD128-FAST-NEXT: return
 665 define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
 666   %masked_v1 = and <4 x i32> %c, %v1
 667   %inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
 668   %masked_v2 = and <4 x i32> %inv_mask, %v2
 669   %a = or <4 x i32> %masked_v2, %masked_v1
 670   ret <4 x i32> %a
 671 }
 672
 673 ; ==============================================================================
 674 ; 2 x i64
 675 ; ==============================================================================
 676 ; CHECK-LABEL: add_v2i64:
 677 ; NO-SIMD128-NOT: i64x2
 678 ; SIMD128-VM-NOT: i64x2
 679 ; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}}
 680 ; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
 681 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 682 define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) {
 683   %a = add <2 x i64> %x, %y
 684   ret <2 x i64> %a
 685 }
 686
 687 ; CHECK-LABEL: sub_v2i64:
 688 ; NO-SIMD128-NOT: i64x2
 689 ; SIMD128-VM-NOT: i64x2
 690 ; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}}
 691 ; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
 692 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 693 define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) {
 694   %a = sub <2 x i64> %x, %y
 695   ret <2 x i64> %a
 696 }
 697
 698 ; v2i64.mul is not in spec
 699 ; CHECK-LABEL: mul_v2i64:
 700 ; NO-SIMD128-NOT: i64x2
 701 ; SIMD128-VM-NOT: i64x2
 702 ; SIMD128-NOT: i64x2.mul
 703 ; SIMD128: i64x2.extract_lane
 704 ; SIMD128: i64.mul
 705 define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) {
 706   %a = mul <2 x i64> %x, %y
 707   ret <2 x i64> %a
 708 }
 709
 710 ; CHECK-LABEL: neg_v2i64:
 711 ; NO-SIMD128-NOT: i64x2
 712 ; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}}
 713 ; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
 714 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 715 define <2 x i64> @neg_v2i64(<2 x i64> %x) {
 716   %a = sub <2 x i64> <i64 0, i64 0>, %x
 717   ret <2 x i64> %a
 718 }
 719
 720 ; CHECK-LABEL: shl_v2i64:
 721 ; NO-SIMD128-NOT: i64x2
 722 ; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}}
 723 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 724 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 725 define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) {
 726   %x2 = zext i32 %x to i64
 727   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 728   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 729   %a = shl <2 x i64> %v, %s
 730   ret <2 x i64> %a
 731 }
 732
 733 ; CHECK-LABEL: shl_sext_v2i64:
 734 ; NO-SIMD128-NOT: i64x2
 735 ; SIMD128-NEXT: .functype shl_sext_v2i64 (v128, i32) -> (v128){{$}}
 736 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}}
 737 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 738 define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) {
 739   %x2 = sext i32 %x to i64
 740   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 741   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 742   %a = shl <2 x i64> %v, %s
 743   ret <2 x i64> %a
 744 }
 745
 746 ; CHECK-LABEL: shl_noext_v2i64:
 747 ; NO-SIMD128-NOT: i64x2
 748 ; SIMD128-NEXT: .functype shl_noext_v2i64 (v128, i64) -> (v128){{$}}
 749 ; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
 750 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 751 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 752 define <2 x i64> @shl_noext_v2i64(<2 x i64> %v, i64 %x) {
 753   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 754   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 755   %a = shl <2 x i64> %v, %s
 756   ret <2 x i64> %a
 757 }
 758
 759 ; CHECK-LABEL: shl_const_v2i64:
 760 ; NO-SIMD128-NOT: i64x2
 761 ; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}}
 762 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 763 ; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 764 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 765 define <2 x i64> @shl_const_v2i64(<2 x i64> %v) {
 766   %a = shl <2 x i64> %v, <i64 5, i64 5>
 767   ret <2 x i64> %a
 768 }
 769
 770 ; CHECK-LABEL: shl_vec_v2i64:
 771 ; NO-SIMD128-NOT: i64x2
 772 ; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}}
 773 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 774 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 775 ; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 776 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 777 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 778 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 779 ; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 780 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 781 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 782 define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 783   %a = shl <2 x i64> %v, %x
 784   ret <2 x i64> %a
 785 }
 786
 787 ; CHECK-LABEL: shr_s_v2i64:
 788 ; NO-SIMD128-NOT: i64x2
 789 ; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}}
 790 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 791 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 792 define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) {
 793   %x2 = zext i32 %x to i64
 794   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 795   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 796   %a = ashr <2 x i64> %v, %s
 797   ret <2 x i64> %a
 798 }
 799
 800 ; CHECK-LABEL: shr_s_sext_v2i64:
 801 ; NO-SIMD128-NOT: i64x2
 802 ; SIMD128-NEXT: .functype shr_s_sext_v2i64 (v128, i32) -> (v128){{$}}
 803 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}}
 804 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 805 define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) {
 806   %x2 = sext i32 %x to i64
 807   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 808   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 809   %a = ashr <2 x i64> %v, %s
 810   ret <2 x i64> %a
 811 }
 812
 813 ; CHECK-LABEL: shr_s_noext_v2i64:
 814 ; NO-SIMD128-NOT: i64x2
 815 ; SIMD128-NEXT: .functype shr_s_noext_v2i64 (v128, i64) -> (v128){{$}}
 816 ; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
 817 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 818 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 819 define <2 x i64> @shr_s_noext_v2i64(<2 x i64> %v, i64 %x) {
 820   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 821   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 822   %a = ashr <2 x i64> %v, %s
 823   ret <2 x i64> %a
 824 }
 825
 826 ; CHECK-LABEL: shr_s_const_v2i64:
 827 ; NO-SIMD128-NOT: i64x2
 828 ; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}}
 829 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 830 ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 831 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 832 define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) {
 833   %a = ashr <2 x i64> %v, <i64 5, i64 5>
 834   ret <2 x i64> %a
 835 }
 836
 837 ; CHECK-LABEL: shr_s_vec_v2i64:
 838 ; NO-SIMD128-NOT: i64x2
 839 ; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}}
 840 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 841 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 842 ; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 843 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 844 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 845 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 846 ; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 847 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 848 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 849 define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 850   %a = ashr <2 x i64> %v, %x
 851   ret <2 x i64> %a
 852 }
 853
 854 ; CHECK-LABEL: shr_u_v2i64:
 855 ; NO-SIMD128-NOT: i64x2
 856 ; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}}
 857 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 858 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 859 define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) {
 860   %x2 = zext i32 %x to i64
 861   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 862   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 863   %a = lshr <2 x i64> %v, %s
 864   ret <2 x i64> %a
 865 }
 866
 867 ; CHECK-LABEL: shr_u_sext_v2i64:
 868 ; NO-SIMD128-NOT: i64x2
 869 ; SIMD128-NEXT: .functype shr_u_sext_v2i64 (v128, i32) -> (v128){{$}}
 870 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}}
 871 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 872 define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) {
 873   %x2 = sext i32 %x to i64
 874   %t = insertelement <2 x i64> undef, i64 %x2, i32 0
 875   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 876   %a = lshr <2 x i64> %v, %s
 877   ret <2 x i64> %a
 878 }
 879
 880 ; CHECK-LABEL: shr_u_noext_v2i64:
 881 ; NO-SIMD128-NOT: i64x2
 882 ; SIMD128-NEXT: .functype shr_u_noext_v2i64 (v128, i64) -> (v128){{$}}
 883 ; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}}
 884 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 885 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 886 define <2 x i64> @shr_u_noext_v2i64(<2 x i64> %v, i64 %x) {
 887   %t = insertelement <2 x i64> undef, i64 %x, i32 0
 888   %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 889   %a = lshr <2 x i64> %v, %s
 890   ret <2 x i64> %a
 891 }
 892
 893 ; CHECK-LABEL: shr_u_const_v2i64:
 894 ; NO-SIMD128-NOT: i64x2
 895 ; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}}
 896 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}}
 897 ; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 898 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 899 define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) {
 900   %a = lshr <2 x i64> %v, <i64 5, i64 5>
 901   ret <2 x i64> %a
 902 }
 903
 904 ; CHECK-LABEL: shr_u_vec_v2i64:
 905 ; NO-SIMD128-NOT: i64x2
 906 ; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}}
 907 ; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}}
 908 ; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}}
 909 ; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
 910 ; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
 911 ; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}}
 912 ; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}}
 913 ; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}}
 914 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}}
 915 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 916 define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) {
 917   %a = lshr <2 x i64> %v, %x
 918   ret <2 x i64> %a
 919 }
 920
 921 ; CHECK-LABEL: and_v2i64:
 922 ; NO-SIMD128-NOT: v128
 923 ; SIMD128-VM-NOT: v128
 924 ; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}}
 925 ; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}}
 926 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 927 define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) {
 928   %a = and <2 x i64> %x, %y
 929   ret <2 x i64> %a
 930 }
 931
 932 ; CHECK-LABEL: or_v2i64:
 933 ; NO-SIMD128-NOT: v128
 934 ; SIMD128-VM-NOT: v128
 935 ; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}}
 936 ; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}}
 937 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 938 define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) {
 939   %a = or <2 x i64> %x, %y
 940   ret <2 x i64> %a
 941 }
 942
 943 ; CHECK-LABEL: xor_v2i64:
 944 ; NO-SIMD128-NOT: v128
 945 ; SIMD128-VM-NOT: v128
 946 ; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}}
 947 ; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}}
 948 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 949 define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) {
 950   %a = xor <2 x i64> %x, %y
 951   ret <2 x i64> %a
 952 }
 953
 954 ; CHECK-LABEL: not_v2i64:
 955 ; NO-SIMD128-NOT: v128
 956 ; SIMD128-VM-NOT: v128
 957 ; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}}
 958 ; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}}
 959 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 960 define <2 x i64> @not_v2i64(<2 x i64> %x) {
 961   %a = xor <2 x i64> %x, <i64 -1, i64 -1>
 962   ret <2 x i64> %a
 963 }
 964
 965 ; CHECK-LABEL: bitselect_v2i64:
 966 ; NO-SIMD128-NOT: v128
 967 ; SIMD128-VM-NOT: v128
 968 ; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
 969 ; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}}
 970 ; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}}
 971 ; SIMD128-FAST-NEXT: v128.not
 972 ; SIMD128-FAST-NEXT: v128.and
 973 ; SIMD128-FAST-NEXT: v128.and
 974 ; SIMD128-FAST-NEXT: v128.or
 975 ; SIMD128-FAST-NEXT: return
 976 define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) {
 977   %masked_v1 = and <2 x i64> %v1, %c
 978   %inv_mask = xor <2 x i64> <i64 -1, i64 -1>, %c
 979   %masked_v2 = and <2 x i64> %v2, %inv_mask
 980   %a = or <2 x i64> %masked_v2, %masked_v1
 981   ret <2 x i64> %a
 982 }
 983
 984 ; ==============================================================================
 985 ; 4 x float
 986 ; ==============================================================================
 987 ; CHECK-LABEL: neg_v4f32:
 988 ; NO-SIMD128-NOT: f32x4
 989 ; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}}
 990 ; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}}
 991 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 992 define <4 x float> @neg_v4f32(<4 x float> %x) {
 993   ; nsz makes this semantically equivalent to flipping sign bit
 994   %a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
 995   ret <4 x float> %a
 996 }
 997
 998 ; CHECK-LABEL: abs_v4f32:
 999 ; NO-SIMD128-NOT: f32x4
1000 ; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}}
1001 ; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}}
1002 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1003 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
1004 define <4 x float> @abs_v4f32(<4 x float> %x) {
1005   %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
1006   ret <4 x float> %a
1007 }
1008
1009 ; CHECK-LABEL: min_unordered_v4f32:
1010 ; NO-SIMD128-NOT: f32x4
1011 ; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}}
1012 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1013 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1014 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1015 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1016 define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
1017   %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
1018   %a = select <4 x i1> %cmps, <4 x float> %x,
1019     <4 x float> <float 5., float 5., float 5., float 5.>
1020   ret <4 x float> %a
1021 }
1022
1023 ; CHECK-LABEL: max_unordered_v4f32:
1024 ; NO-SIMD128-NOT: f32x4
1025 ; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}}
1026 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1027 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1028 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1029 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1030 define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
1031   %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
1032   %a = select <4 x i1> %cmps, <4 x float> %x,
1033     <4 x float> <float 5., float 5., float 5., float 5.>
1034   ret <4 x float> %a
1035 }
1036
1037 ; CHECK-LABEL: min_ordered_v4f32:
1038 ; NO-SIMD128-NOT: f32x4
1039 ; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}}
1040 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1041 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1042 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1043 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1044 define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
1045   %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
1046   %a = select <4 x i1> %cmps,
1047     <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1048   ret <4 x float> %a
1049 }
1050
1051 ; CHECK-LABEL: max_ordered_v4f32:
1052 ; NO-SIMD128-NOT: f32x4
1053 ; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}}
1054 ; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
1055 ; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1056 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1057 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1058 define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
1059   %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
1060   %a = select <4 x i1> %cmps,
1061     <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
1062   ret <4 x float> %a
1063 }
1064
1065 ; CHECK-LABEL: min_intrinsic_v4f32:
1066 ; NO-SIMD128-NOT: f32x4
1067 ; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1068 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1069 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1070 declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1071 define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1072   %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
1073   ret <4 x float> %a
1074 }
1075
1076 ; CHECK-LABEL: minnum_intrinsic_v4f32:
1077 ; NO-SIMD128-NOT: f32x4
1078 ; SIMD128-NEXT: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1079 ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1080 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1081 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1082 define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1083   %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
1084   ret <4 x float> %a
1085 }
1086
1087 ; CHECK-LABEL: max_intrinsic_v4f32:
1088 ; NO-SIMD128-NOT: f32x4
1089 ; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1090 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1091 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1092 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1093 define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1094   %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
1095   ret <4 x float> %a
1096 }
1097
1098 ; CHECK-LABEL: maxnum_intrinsic_v4f32:
1099 ; NO-SIMD128-NOT: f32x4
1100 ; SIMD128-NEXT: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}}
1101 ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1102 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1103 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1104 define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
1105   %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
1106   ret <4 x float> %a
1107 }
1108
1109 ; CHECK-LABEL: min_const_intrinsic_v4f32:
1110 ; NO-SIMD128-NOT: f32x4
1111 ; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}}
1112 ; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1113 ; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1114 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1115 define <4 x float> @min_const_intrinsic_v4f32() {
1116   %a = call <4 x float> @llvm.minimum.v4f32(
1117     <4 x float> <float 42., float 42., float 42., float 42.>,
1118     <4 x float> <float 5., float 5., float 5., float 5.>
1119   )
1120   ret <4 x float> %a
1121 }
1122
1123 ; CHECK-LABEL: max_const_intrinsic_v4f32:
1124 ; NO-SIMD128-NOT: f32x4
1125 ; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}}
1126 ; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1127 ; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1128 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1129 define <4 x float> @max_const_intrinsic_v4f32() {
1130   %a = call <4 x float> @llvm.maximum.v4f32(
1131     <4 x float> <float 42., float 42., float 42., float 42.>,
1132     <4 x float> <float 5., float 5., float 5., float 5.>
1133   )
1134   ret <4 x float> %a
1135 }
1136
1137 ; CHECK-LABEL: add_v4f32:
1138 ; NO-SIMD128-NOT: f32x4
1139 ; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}}
1140 ; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1141 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1142 define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
1143   %a = fadd <4 x float> %x, %y
1144   ret <4 x float> %a
1145 }
1146
1147 ; CHECK-LABEL: sub_v4f32:
1148 ; NO-SIMD128-NOT: f32x4
1149 ; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}}
1150 ; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1151 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1152 define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
1153   %a = fsub <4 x float> %x, %y
1154   ret <4 x float> %a
1155 }
1156
1157 ; CHECK-LABEL: div_v4f32:
1158 ; NO-SIMD128-NOT: f32x4
1159 ; SIMD128-VM-NOT: f32x4.div
1160 ; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}}
1161 ; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1162 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1163 define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
1164   %a = fdiv <4 x float> %x, %y
1165   ret <4 x float> %a
1166 }
1167
1168 ; CHECK-LABEL: mul_v4f32:
1169 ; NO-SIMD128-NOT: f32x4
1170 ; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}}
1171 ; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1172 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1173 define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
1174   %a = fmul <4 x float> %x, %y
1175   ret <4 x float> %a
1176 }
1177
1178 ; CHECK-LABEL: sqrt_v4f32:
1179 ; NO-SIMD128-NOT: f32x4
1180 ; SIMD128-VM-NOT: f32x4.sqrt
1181 ; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}}
1182 ; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1183 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1184 declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1185 define <4 x float> @sqrt_v4f32(<4 x float> %x) {
1186   %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
1187   ret <4 x float> %a
1188 }
1189
1190 ; ==============================================================================
1191 ; 2 x double
1192 ; ==============================================================================
1193 ; CHECK-LABEL: neg_v2f64:
1194 ; NO-SIMD128-NOT: f64x2
1195 ; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}}
1196 ; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}}
1197 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1198 define <2 x double> @neg_v2f64(<2 x double> %x) {
1199   ; nsz makes this semantically equivalent to flipping sign bit
1200   %a = fsub nsz <2 x double> <double 0., double 0.>, %x
1201   ret <2 x double> %a
1202 }
1203
1204 ; CHECK-LABEL: abs_v2f64:
1205 ; NO-SIMD128-NOT: f64x2
1206 ; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}}
1207 ; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}}
1208 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1209 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone
1210 define <2 x double> @abs_v2f64(<2 x double> %x) {
1211   %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
1212   ret <2 x double> %a
1213 }
1214
1215 ; CHECK-LABEL: min_unordered_v2f64:
1216 ; NO-SIMD128-NOT: f64x2
1217 ; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}}
1218 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1219 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1220 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1221 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1222 define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
1223   %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
1224   %a = select <2 x i1> %cmps, <2 x double> %x,
1225     <2 x double> <double 5., double 5.>
1226   ret <2 x double> %a
1227 }
1228
1229 ; CHECK-LABEL: max_unordered_v2f64:
1230 ; NO-SIMD128-NOT: f64x2
1231 ; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}}
1232 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1233 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1234 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1235 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1236 define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
1237   %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
1238   %a = select <2 x i1> %cmps, <2 x double> %x,
1239     <2 x double> <double 5., double 5.>
1240   ret <2 x double> %a
1241 }
1242
1243 ; CHECK-LABEL: min_ordered_v2f64:
1244 ; NO-SIMD128-NOT: f64x2
1245 ; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}}
1246 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1247 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1248 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1249 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1250 define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
1251   %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
1252   %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1253     <2 x double> %x
1254   ret <2 x double> %a
1255 }
1256
1257 ; CHECK-LABEL: max_ordered_v2f64:
1258 ; NO-SIMD128-NOT: f64x2
1259 ; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}}
1260 ; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
1261 ; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
1262 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
1263 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1264 define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
1265   %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
1266   %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
1267     <2 x double> %x
1268   ret <2 x double> %a
1269 }
1270
1271 ; CHECK-LABEL: min_intrinsic_v2f64:
1272 ; NO-SIMD128-NOT: f64x2
1273 ; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1274 ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
1275 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1276 declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1277 define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1278   %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
1279   ret <2 x double> %a
1280 }
1281
1282 ; CHECK-LABEL: max_intrinsic_v2f64:
1283 ; NO-SIMD128-NOT: f64x2
1284 ; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}}
1285 ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
1286 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1287 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1288 define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
1289   %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
1290   ret <2 x double> %a
1291 }
1292
1293 ; CHECK-LABEL: min_const_intrinsic_v2f64:
1294 ; NO-SIMD128-NOT: f64x2
1295 ; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}}
1296 ; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}}
1297 ; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1298 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1299 define <2 x double> @min_const_intrinsic_v2f64() {
1300   %a = call <2 x double> @llvm.minimum.v2f64(
1301     <2 x double> <double 42., double 42.>,
1302     <2 x double> <double 5., double 5.>
1303   )
1304   ret <2 x double> %a
1305 }
1306
1307 ; CHECK-LABEL: max_const_intrinsic_v2f64:
1308 ; NO-SIMD128-NOT: f64x2
1309 ; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}}
1310 ; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}}
1311 ; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}}
1312 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1313 define <2 x double> @max_const_intrinsic_v2f64() {
1314   %a = call <2 x double> @llvm.maximum.v2f64(
1315     <2 x double> <double 42., double 42.>,
1316     <2 x double> <double 5., double 5.>
1317   )
1318   ret <2 x double> %a
1319 }
1320
1321 ; CHECK-LABEL: add_v2f64:
1322 ; NO-SIMD128-NOT: f64x2
1323 ; SIMD128-VM-NOT: f62x2
1324 ; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}}
1325 ; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}}
1326 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1327 define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) {
1328   %a = fadd <2 x double> %x, %y
1329   ret <2 x double> %a
1330 }
1331
1332 ; CHECK-LABEL: sub_v2f64:
1333 ; NO-SIMD128-NOT: f64x2
1334 ; SIMD128-VM-NOT: f62x2
1335 ; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}}
1336 ; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}}
1337 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1338 define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) {
1339   %a = fsub <2 x double> %x, %y
1340   ret <2 x double> %a
1341 }
1342
1343 ; CHECK-LABEL: div_v2f64:
1344 ; NO-SIMD128-NOT: f64x2
1345 ; SIMD128-VM-NOT: f62x2
1346 ; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}}
1347 ; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}}
1348 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1349 define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) {
1350   %a = fdiv <2 x double> %x, %y
1351   ret <2 x double> %a
1352 }
1353
1354 ; CHECK-LABEL: mul_v2f64:
1355 ; NO-SIMD128-NOT: f64x2
1356 ; SIMD128-VM-NOT: f62x2
1357 ; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}}
1358 ; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}}
1359 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1360 define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) {
1361   %a = fmul <2 x double> %x, %y
1362   ret <2 x double> %a
1363 }
1364
1365 ; CHECK-LABEL: sqrt_v2f64:
1366 ; NO-SIMD128-NOT: f64x2
1367 ; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}}
1368 ; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}}
1369 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1370 declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1371 define <2 x double> @sqrt_v2f64(<2 x double> %x) {
1372   %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
1373   ret <2 x double> %a
1374 }