test/CodeGen/WebAssembly/simd-offset.ll

   1 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128
   2 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM
   3 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128
   4
   5 ; Test SIMD loads and stores
   6
   7 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
   8 target triple = "wasm32-unknown-unknown"
   9
  10 ; ==============================================================================
  11 ; 16 x i8
  12 ; ==============================================================================
  13 ; CHECK-LABEL: load_v16i8:
  14 ; NO-SIMD128-NOT: v128
  15 ; SIMD128-NEXT: .functype load_v16i8 (i32) -> (v128){{$}}
  16 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
  17 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  18 define <16 x i8> @load_v16i8(<16 x i8>* %p) {
  19   %v = load <16 x i8>, <16 x i8>* %p
  20   ret <16 x i8> %v
  21 }
  22
  23 ; CHECK-LABEL: load_splat_v16i8:
  24 ; SIMD128-VM-NOT: v8x16.load_splat
  25 ; NO-SIMD128-NOT: v128
  26 ; SIMD128-NEXT: .functype load_splat_v16i8 (i32) -> (v128){{$}}
  27 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
  28 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  29 define <16 x i8> @load_splat_v16i8(i8* %p) {
  30   %e = load i8, i8* %p
  31   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
  32   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
  33   ret <16 x i8> %v2
  34 }
  35
  36 ; CHECK-LABEL: load_v16i8_with_folded_offset:
  37 ; NO-SIMD128-NOT: v128
  38 ; SIMD128-NEXT: .functype load_v16i8_with_folded_offset (i32) -> (v128){{$}}
  39 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
  40 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  41 define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) {
  42   %q = ptrtoint <16 x i8>* %p to i32
  43   %r = add nuw i32 %q, 16
  44   %s = inttoptr i32 %r to <16 x i8>*
  45   %v = load <16 x i8>, <16 x i8>* %s
  46   ret <16 x i8> %v
  47 }
  48
  49 ; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
  50 ; NO-SIMD128-NOT: v128
  51 ; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128){{$}}
  52 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
  53 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  54 define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) {
  55   %q = ptrtoint i8* %p to i32
  56   %r = add nuw i32 %q, 16
  57   %s = inttoptr i32 %r to i8*
  58   %e = load i8, i8* %s
  59   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
  60   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
  61   ret <16 x i8> %v2
  62 }
  63
  64 ; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
  65 ; NO-SIMD128-NOT: v128
  66 ; SIMD128-NEXT: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
  67 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
  68 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  69 define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) {
  70   %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
  71   %v = load <16 x i8>, <16 x i8>* %s
  72   ret <16 x i8> %v
  73 }
  74
  75 ; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
  76 ; NO-SIMD128-NOT: v128
  77 ; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128){{$}}
  78 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 1($0){{$}}
  79 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  80 define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) {
  81   %s = getelementptr inbounds i8, i8* %p, i32 1
  82   %e = load i8, i8* %s
  83   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
  84   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
  85   ret <16 x i8> %v2
  86 }
  87
  88 ; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
  89 ; NO-SIMD128-NOT: v128
  90 ; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
  91 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
  92 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
  93 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
  94 ; SIMD128-NEXT: return $pop[[R]]{{$}}
  95 define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) {
  96   %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
  97   %v = load <16 x i8>, <16 x i8>* %s
  98   ret <16 x i8> %v
  99 }
 100
 101 ; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
 102 ; NO-SIMD128-NOT: v128
 103 ; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
 104 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
 105 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 106 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 107 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 108 define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) {
 109   %s = getelementptr inbounds i8, i8* %p, i32 -1
 110   %e = load i8, i8* %s
 111   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
 112   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
 113   ret <16 x i8> %v2
 114 }
 115
 116 ; CHECK-LABEL: load_v16i8_with_unfolded_offset:
 117 ; NO-SIMD128-NOT: v128
 118 ; SIMD128-NEXT: .functype load_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
 119 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 120 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 121 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 122 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 123 define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) {
 124   %q = ptrtoint <16 x i8>* %p to i32
 125   %r = add nsw i32 %q, 16
 126   %s = inttoptr i32 %r to <16 x i8>*
 127   %v = load <16 x i8>, <16 x i8>* %s
 128   ret <16 x i8> %v
 129 }
 130
 131 ; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
 132 ; NO-SIMD128-NOT: v128
 133 ; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128){{$}}
 134 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 135 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 136 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 137 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 138 define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) {
 139   %q = ptrtoint i8* %p to i32
 140   %r = add nsw i32 %q, 16
 141   %s = inttoptr i32 %r to i8*
 142   %e = load i8, i8* %s
 143   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
 144   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
 145   ret <16 x i8> %v2
 146 }
 147
 148 ; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
 149 ; NO-SIMD128-NOT: v128
 150 ; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
 151 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 152 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 153 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 154 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 155 define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) {
 156   %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
 157   %v = load <16 x i8>, <16 x i8>* %s
 158   ret <16 x i8> %v
 159 }
 160
 161 ; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
 162 ; NO-SIMD128-NOT: v128
 163 ; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}}
 164 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 1{{$}}
 165 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 166 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 167 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 168 define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) {
 169   %s = getelementptr i8, i8* %p, i32 1
 170   %e = load i8, i8* %s
 171   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
 172   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
 173   ret <16 x i8> %v2
 174 }
 175
 176 ; CHECK-LABEL: load_v16i8_from_numeric_address:
 177 ; NO-SIMD128-NOT: v128
 178 ; SIMD128-NEXT: .functype load_v16i8_from_numeric_address () -> (v128){{$}}
 179 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 180 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
 181 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 182 define <16 x i8> @load_v16i8_from_numeric_address() {
 183   %s = inttoptr i32 32 to <16 x i8>*
 184   %v = load <16 x i8>, <16 x i8>* %s
 185   ret <16 x i8> %v
 186 }
 187
 188 ; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
 189 ; NO-SIMD128-NOT: v128
 190 ; SIMD128-NEXT: .functype load_splat_v16i8_from_numeric_address () -> (v128){{$}}
 191 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 192 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
 193 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 194 define <16 x i8> @load_splat_v16i8_from_numeric_address() {
 195   %s = inttoptr i32 32 to i8*
 196   %e = load i8, i8* %s
 197   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
 198   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
 199   ret <16 x i8> %v2
 200 }
 201
 202 ; CHECK-LABEL: load_v16i8_from_global_address:
 203 ; NO-SIMD128-NOT: v128
 204 ; SIMD128-NEXT: .functype load_v16i8_from_global_address () -> (v128){{$}}
 205 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 206 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]){{$}}
 207 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 208 @gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
 209 define <16 x i8> @load_v16i8_from_global_address() {
 210   %v = load <16 x i8>, <16 x i8>* @gv_v16i8
 211   ret <16 x i8> %v
 212 }
 213
 214 ; CHECK-LABEL: load_splat_v16i8_from_global_address:
 215 ; NO-SIMD128-NOT: v128
 216 ; SIMD128-NEXT: .functype load_splat_v16i8_from_global_address () -> (v128){{$}}
 217 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 218 ; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, gv_i8($pop[[L0]]){{$}}
 219 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 220 @gv_i8 = global i8 42
 221 define <16 x i8> @load_splat_v16i8_from_global_address() {
 222   %e = load i8, i8* @gv_i8
 223   %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
 224   %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
 225   ret <16 x i8> %v2
 226 }
 227
 228 ; CHECK-LABEL: store_v16i8:
 229 ; NO-SIMD128-NOT: v128
 230 ; SIMD128-NEXT: .functype store_v16i8 (v128, i32) -> (){{$}}
 231 ; SIMD128-NEXT: v128.store 0($1), $0{{$}}
 232 define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) {
 233   store <16 x i8> %v , <16 x i8>* %p
 234   ret void
 235 }
 236
 237 ; CHECK-LABEL: store_v16i8_with_folded_offset:
 238 ; NO-SIMD128-NOT: v128
 239 ; SIMD128-NEXT: .functype store_v16i8_with_folded_offset (v128, i32) -> (){{$}}
 240 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
 241 define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) {
 242   %q = ptrtoint <16 x i8>* %p to i32
 243   %r = add nuw i32 %q, 16
 244   %s = inttoptr i32 %r to <16 x i8>*
 245   store <16 x i8> %v , <16 x i8>* %s
 246   ret void
 247 }
 248
 249 ; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
 250 ; NO-SIMD128-NOT: v128
 251 ; SIMD128-NEXT: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> (){{$}}
 252 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
 253 define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
 254   %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1
 255   store <16 x i8> %v , <16 x i8>* %s
 256   ret void
 257 }
 258
 259 ; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
 260 ; NO-SIMD128-NOT: v128
 261 ; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
 262 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 263 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 264 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 265 define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) {
 266   %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
 267   store <16 x i8> %v , <16 x i8>* %s
 268   ret void
 269 }
 270
 271 ; CHECK-LABEL: store_v16i8_with_unfolded_offset:
 272 ; NO-SIMD128-NOT: v128
 273 ; SIMD128-NEXT: .functype store_v16i8_with_unfolded_offset (v128, i32) -> (){{$}}
 274 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 275 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 276 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 277 define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) {
 278   %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1
 279   store <16 x i8> %v , <16 x i8>* %s
 280   ret void
 281 }
 282
 283 ; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
 284 ; NO-SIMD128-NOT: v128
 285 ; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> (){{$}}
 286 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 287 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 288 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 289 define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) {
 290   %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1
 291   store <16 x i8> %v , <16 x i8>* %s
 292   ret void
 293 }
 294
 295 ; CHECK-LABEL: store_v16i8_to_numeric_address:
 296 ; NO-SIMD128-NOT: v128
 297 ; SIMD128-NEXT: .functype store_v16i8_to_numeric_address (v128) -> (){{$}}
 298 ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
 299 ; SIMD128-NEXT: v128.store 32($pop[[R]]), $0{{$}}
 300 define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
 301   %s = inttoptr i32 32 to <16 x i8>*
 302   store <16 x i8> %v , <16 x i8>* %s
 303   ret void
 304 }
 305
 306 ; CHECK-LABEL: store_v16i8_to_global_address:
 307 ; NO-SIMD128-NOT: v128
 308 ; SIMD128-NEXT: .functype store_v16i8_to_global_address (v128) -> (){{$}}
 309 ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
 310 ; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]), $0{{$}}
 311 define void @store_v16i8_to_global_address(<16 x i8> %v) {
 312   store <16 x i8> %v , <16 x i8>* @gv_v16i8
 313   ret void
 314 }
 315
 316 ; ==============================================================================
 317 ; 8 x i16
 318 ; ==============================================================================
 319 ; CHECK-LABEL: load_v8i16:
 320 ; NO-SIMD128-NOT: v128
 321 ; SIMD128-NEXT: .functype load_v8i16 (i32) -> (v128){{$}}
 322 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
 323 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 324 define <8 x i16> @load_v8i16(<8 x i16>* %p) {
 325   %v = load <8 x i16>, <8 x i16>* %p
 326   ret <8 x i16> %v
 327 }
 328
 329 ; CHECK-LABEL: load_splat_v8i16:
 330 ; NO-SIMD128-NOT: v128
 331 ; SIMD128-NEXT: .functype load_splat_v8i16 (i32) -> (v128){{$}}
 332 ; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 333 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 334 define <8 x i16> @load_splat_v8i16(i16* %p) {
 335   %e = load i16, i16* %p
 336   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 337   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 338   ret <8 x i16> %v2
 339 }
 340
 341 ; CHECK-LABEL: load_v8i16_with_folded_offset:
 342 ; NO-SIMD128-NOT: v128
 343 ; SIMD128-NEXT: .functype load_v8i16_with_folded_offset (i32) -> (v128){{$}}
 344 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
 345 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 346 define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) {
 347   %q = ptrtoint <8 x i16>* %p to i32
 348   %r = add nuw i32 %q, 16
 349   %s = inttoptr i32 %r to <8 x i16>*
 350   %v = load <8 x i16>, <8 x i16>* %s
 351   ret <8 x i16> %v
 352 }
 353
 354 ; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
 355 ; NO-SIMD128-NOT: v128
 356 ; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128){{$}}
 357 ; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
 358 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 359 define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) {
 360   %q = ptrtoint i16* %p to i32
 361   %r = add nuw i32 %q, 16
 362   %s = inttoptr i32 %r to i16*
 363   %e = load i16, i16* %s
 364   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 365   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 366   ret <8 x i16> %v2
 367 }
 368
 369 ; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
 370 ; NO-SIMD128-NOT: v128
 371 ; SIMD128-NEXT: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
 372 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
 373 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 374 define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) {
 375   %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
 376   %v = load <8 x i16>, <8 x i16>* %s
 377   ret <8 x i16> %v
 378 }
 379
 380 ; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
 381 ; NO-SIMD128-NOT: v128
 382 ; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128){{$}}
 383 ; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 2($0){{$}}
 384 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 385 define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) {
 386   %s = getelementptr inbounds i16, i16* %p, i32 1
 387   %e = load i16, i16* %s
 388   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 389   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 390   ret <8 x i16> %v2
 391 }
 392
 393 ; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
 394 ; NO-SIMD128-NOT: v128
 395 ; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
 396 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 397 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 398 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 399 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 400 define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) {
 401   %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
 402   %v = load <8 x i16>, <8 x i16>* %s
 403   ret <8 x i16> %v
 404 }
 405
 406 ; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
 407 ; NO-SIMD128-NOT: v128
 408 ; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
 409 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -2{{$}}
 410 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 411 ; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 412 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 413 define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) {
 414   %s = getelementptr inbounds i16, i16* %p, i32 -1
 415   %e = load i16, i16* %s
 416   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 417   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 418   ret <8 x i16> %v2
 419 }
 420
 421 ; CHECK-LABEL: load_v8i16_with_unfolded_offset:
 422 ; NO-SIMD128-NOT: v128
 423 ; SIMD128-NEXT: .functype load_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
 424 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 425 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 426 ; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
 427 ; SIMD128-NEXT: return $pop[[L0]]{{$}}
 428 define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) {
 429   %q = ptrtoint <8 x i16>* %p to i32
 430   %r = add nsw i32 %q, 16
 431   %s = inttoptr i32 %r to <8 x i16>*
 432   %v = load <8 x i16>, <8 x i16>* %s
 433   ret <8 x i16> %v
 434 }
 435
 436 ; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
 437 ; NO-SIMD128-NOT: v128
 438 ; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128){{$}}
 439 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 440 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 441 ; SIMD128-NEXT: v16x8.load_splat $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}}
 442 ; SIMD128-NEXT: return $pop[[L0]]{{$}}
 443 define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) {
 444   %q = ptrtoint i16* %p to i32
 445   %r = add nsw i32 %q, 16
 446   %s = inttoptr i32 %r to i16*
 447   %e = load i16, i16* %s
 448   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 449   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 450   ret <8 x i16> %v2
 451 }
 452
 453 ; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
 454 ; NO-SIMD128-NOT: v128
 455 ; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
 456 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 457 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 458 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 459 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 460 define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) {
 461   %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
 462   %v = load <8 x i16>, <8 x i16>* %s
 463   ret <8 x i16> %v
 464 }
 465
 466 ; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
 467 ; NO-SIMD128-NOT: v128
 468 ; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}}
 469 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 2{{$}}
 470 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 471 ; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 472 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 473 define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) {
 474   %s = getelementptr i16, i16* %p, i32 1
 475   %e = load i16, i16* %s
 476   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 477   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 478   ret <8 x i16> %v2
 479 }
 480
 481 ; CHECK-LABEL: load_v8i16_from_numeric_address:
 482 ; NO-SIMD128-NOT: v128
 483 ; SIMD128-NEXT: .functype load_v8i16_from_numeric_address () -> (v128){{$}}
 484 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 485 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
 486 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 487 define <8 x i16> @load_v8i16_from_numeric_address() {
 488   %s = inttoptr i32 32 to <8 x i16>*
 489   %v = load <8 x i16>, <8 x i16>* %s
 490   ret <8 x i16> %v
 491 }
 492
 493 ; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
 494 ; NO-SIMD128-NOT: v128
 495 ; SIMD128-NEXT: .functype load_splat_v8i16_from_numeric_address () -> (v128){{$}}
 496 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 497 ; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
 498 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 499 define <8 x i16> @load_splat_v8i16_from_numeric_address() {
 500   %s = inttoptr i32 32 to i16*
 501   %e = load i16, i16* %s
 502   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 503   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 504   ret <8 x i16> %v2
 505 }
 506
 507 ; CHECK-LABEL: load_v8i16_from_global_address:
 508 ; NO-SIMD128-NOT: v128
 509 ; SIMD128-NEXT: .functype load_v8i16_from_global_address () -> (v128){{$}}
 510 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 511 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]){{$}}
 512 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 513 @gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
 514 define <8 x i16> @load_v8i16_from_global_address() {
 515   %v = load <8 x i16>, <8 x i16>* @gv_v8i16
 516   ret <8 x i16> %v
 517 }
 518
 519 ; CHECK-LABEL: load_splat_v8i16_from_global_address:
 520 ; NO-SIMD128-NOT: v128
 521 ; SIMD128-NEXT: .functype load_splat_v8i16_from_global_address () -> (v128){{$}}
 522 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 523 ; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, gv_i16($pop[[L0]]){{$}}
 524 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 525 @gv_i16 = global i16 42
 526 define <8 x i16> @load_splat_v8i16_from_global_address() {
 527   %e = load i16, i16* @gv_i16
 528   %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
 529   %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
 530   ret <8 x i16> %v2
 531 }
 532
 533 ; CHECK-LABEL: store_v8i16:
 534 ; NO-SIMD128-NOT: v128
 535 ; SIMD128-NEXT: .functype store_v8i16 (v128, i32) -> (){{$}}
 536 ; SIMD128-NEXT: v128.store 0($1), $0{{$}}
 537 define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) {
 538   store <8 x i16> %v , <8 x i16>* %p
 539   ret void
 540 }
 541
 542 ; CHECK-LABEL: store_v8i16_with_folded_offset:
 543 ; NO-SIMD128-NOT: v128
 544 ; SIMD128-NEXT: .functype store_v8i16_with_folded_offset (v128, i32) -> (){{$}}
 545 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
 546 define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) {
 547   %q = ptrtoint <8 x i16>* %p to i32
 548   %r = add nuw i32 %q, 16
 549   %s = inttoptr i32 %r to <8 x i16>*
 550   store <8 x i16> %v , <8 x i16>* %s
 551   ret void
 552 }
 553
 554 ; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
 555 ; NO-SIMD128-NOT: v128
 556 ; SIMD128-NEXT: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> (){{$}}
 557 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
 558 define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
 559   %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1
 560   store <8 x i16> %v , <8 x i16>* %s
 561   ret void
 562 }
 563
 564 ; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
 565 ; NO-SIMD128-NOT: v128
 566 ; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
 567 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 568 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 569 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 570 define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) {
 571   %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
 572   store <8 x i16> %v , <8 x i16>* %s
 573   ret void
 574 }
 575
 576 ; CHECK-LABEL: store_v8i16_with_unfolded_offset:
 577 ; NO-SIMD128-NOT: v128
 578 ; SIMD128-NEXT: .functype store_v8i16_with_unfolded_offset (v128, i32) -> (){{$}}
 579 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 580 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 581 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 582 define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) {
 583   %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1
 584   store <8 x i16> %v , <8 x i16>* %s
 585   ret void
 586 }
 587
 588 ; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
 589 ; NO-SIMD128-NOT: v128
 590 ; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> (){{$}}
 591 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 592 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 593 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 594 define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) {
 595   %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1
 596   store <8 x i16> %v , <8 x i16>* %s
 597   ret void
 598 }
 599
 600 ; CHECK-LABEL: store_v8i16_to_numeric_address:
 601 ; NO-SIMD128-NOT: v128
 602 ; SIMD128-NEXT: .functype store_v8i16_to_numeric_address (v128) -> (){{$}}
 603 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 604 ; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
 605 define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
 606   %s = inttoptr i32 32 to <8 x i16>*
 607   store <8 x i16> %v , <8 x i16>* %s
 608   ret void
 609 }
 610
 611 ; CHECK-LABEL: store_v8i16_to_global_address:
 612 ; NO-SIMD128-NOT: v128
 613 ; SIMD128-NEXT: .functype store_v8i16_to_global_address (v128) -> (){{$}}
 614 ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
 615 ; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]), $0{{$}}
 616 define void @store_v8i16_to_global_address(<8 x i16> %v) {
 617   store <8 x i16> %v , <8 x i16>* @gv_v8i16
 618   ret void
 619 }
 620
 621 ; ==============================================================================
 622 ; 4 x i32
 623 ; ==============================================================================
 624 ; CHECK-LABEL: load_v4i32:
 625 ; NO-SIMD128-NOT: v128
 626 ; SIMD128-NEXT: .functype load_v4i32 (i32) -> (v128){{$}}
 627 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
 628 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 629 define <4 x i32> @load_v4i32(<4 x i32>* %p) {
 630   %v = load <4 x i32>, <4 x i32>* %p
 631   ret <4 x i32> %v
 632 }
 633
 634 ; CHECK-LABEL: load_splat_v4i32:
 635 ; NO-SIMD128-NOT: v128
 636 ; SIMD128-NEXT: .functype load_splat_v4i32 (i32) -> (v128){{$}}
 637 ; SIMD128-NEXT: v32x4.load_splat
 638 define <4 x i32> @load_splat_v4i32(i32* %addr) {
 639   %e = load i32, i32* %addr, align 4
 640   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 641   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 642   ret <4 x i32> %v2
 643 }
 644
 645 ; CHECK-LABEL: load_v4i32_with_folded_offset:
 646 ; NO-SIMD128-NOT: v128
 647 ; SIMD128-NEXT: .functype load_v4i32_with_folded_offset (i32) -> (v128){{$}}
 648 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
 649 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 650 define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) {
 651   %q = ptrtoint <4 x i32>* %p to i32
 652   %r = add nuw i32 %q, 16
 653   %s = inttoptr i32 %r to <4 x i32>*
 654   %v = load <4 x i32>, <4 x i32>* %s
 655   ret <4 x i32> %v
 656 }
 657
 658 ; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
 659 ; NO-SIMD128-NOT: v128
 660 ; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128){{$}}
 661 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
 662 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 663 define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) {
 664   %q = ptrtoint i32* %p to i32
 665   %r = add nuw i32 %q, 16
 666   %s = inttoptr i32 %r to i32*
 667   %e = load i32, i32* %s
 668   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 669   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 670   ret <4 x i32> %v2
 671 }
 672
 673 ; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
 674 ; NO-SIMD128-NOT: v128
 675 ; SIMD128-NEXT: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
 676 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
 677 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 678 define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) {
 679   %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
 680   %v = load <4 x i32>, <4 x i32>* %s
 681   ret <4 x i32> %v
 682 }
 683
 684 ; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
 685 ; NO-SIMD128-NOT: v128
 686 ; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128){{$}}
 687 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
 688 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 689 define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) {
 690   %s = getelementptr inbounds i32, i32* %p, i32 1
 691   %e = load i32, i32* %s
 692   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 693   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 694   ret <4 x i32> %v2
 695 }
 696
 697 ; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
 698 ; NO-SIMD128-NOT: v128
 699 ; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
 700 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 701 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 702 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 703 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 704 define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) {
 705   %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
 706   %v = load <4 x i32>, <4 x i32>* %s
 707   ret <4 x i32> %v
 708 }
 709
 710 ; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
 711 ; NO-SIMD128-NOT: v128
 712 ; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
 713 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
 714 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 715 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 716 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 717 define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) {
 718   %s = getelementptr inbounds i32, i32* %p, i32 -1
 719   %e = load i32, i32* %s
 720   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 721   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 722   ret <4 x i32> %v2
 723 }
 724
 725 ; CHECK-LABEL: load_v4i32_with_unfolded_offset:
 726 ; NO-SIMD128-NOT: v128
 727 ; SIMD128-NEXT: .functype load_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
 728 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 729 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 730 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 731 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 732 define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) {
 733   %q = ptrtoint <4 x i32>* %p to i32
 734   %r = add nsw i32 %q, 16
 735   %s = inttoptr i32 %r to <4 x i32>*
 736   %v = load <4 x i32>, <4 x i32>* %s
 737   ret <4 x i32> %v
 738 }
 739
 740 ; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
 741 ; NO-SIMD128-NOT: v128
 742 ; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128){{$}}
 743 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 744 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 745 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 746 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 747 define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) {
 748   %q = ptrtoint i32* %p to i32
 749   %r = add nsw i32 %q, 16
 750   %s = inttoptr i32 %r to i32*
 751   %e = load i32, i32* %s
 752   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 753   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 754   ret <4 x i32> %v2
 755 }
 756
 757 ; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
 758 ; NO-SIMD128-NOT: v128
 759 ; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
 760 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 761 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 762 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 763 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 764 define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) {
 765   %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
 766   %v = load <4 x i32>, <4 x i32>* %s
 767   ret <4 x i32> %v
 768 }
 769
 770 ; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
 771 ; NO-SIMD128-NOT: v128
 772 ; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}}
 773 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
 774 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
 775 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
 776 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 777 define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) {
 778   %s = getelementptr i32, i32* %p, i32 1
 779   %e = load i32, i32* %s
 780   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 781   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 782   ret <4 x i32> %v2
 783 }
 784
 785 ; CHECK-LABEL: load_v4i32_from_numeric_address:
 786 ; NO-SIMD128-NOT: v128
 787 ; SIMD128-NEXT: .functype load_v4i32_from_numeric_address () -> (v128){{$}}
 788 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 789 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
 790 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 791 define <4 x i32> @load_v4i32_from_numeric_address() {
 792   %s = inttoptr i32 32 to <4 x i32>*
 793   %v = load <4 x i32>, <4 x i32>* %s
 794   ret <4 x i32> %v
 795 }
 796
 797 ; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
 798 ; NO-SIMD128-NOT: v128
 799 ; SIMD128-NEXT: .functype load_splat_v4i32_from_numeric_address () -> (v128){{$}}
 800 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 801 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
 802 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 803 define <4 x i32> @load_splat_v4i32_from_numeric_address() {
 804   %s = inttoptr i32 32 to i32*
 805   %e = load i32, i32* %s
 806   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 807   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 808   ret <4 x i32> %v2
 809 }
 810
 811 ; CHECK-LABEL: load_v4i32_from_global_address:
 812 ; NO-SIMD128-NOT: v128
 813 ; SIMD128-NEXT: .functype load_v4i32_from_global_address () -> (v128){{$}}
 814 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 815 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]){{$}}
 816 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 817 @gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
 818 define <4 x i32> @load_v4i32_from_global_address() {
 819   %v = load <4 x i32>, <4 x i32>* @gv_v4i32
 820   ret <4 x i32> %v
 821 }
 822
 823 ; CHECK-LABEL: load_splat_v4i32_from_global_address:
 824 ; NO-SIMD128-NOT: v128
 825 ; SIMD128-NEXT: .functype load_splat_v4i32_from_global_address () -> (v128){{$}}
 826 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 827 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_i32($pop[[L0]]){{$}}
 828 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 829 @gv_i32 = global i32 42
 830 define <4 x i32> @load_splat_v4i32_from_global_address() {
 831   %e = load i32, i32* @gv_i32
 832   %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
 833   %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
 834   ret <4 x i32> %v2
 835 }
 836
 837 ; CHECK-LABEL: store_v4i32:
 838 ; NO-SIMD128-NOT: v128
 839 ; SIMD128-NEXT: .functype store_v4i32 (v128, i32) -> (){{$}}
 840 ; SIMD128-NEXT: v128.store 0($1), $0{{$}}
 841 define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) {
 842   store <4 x i32> %v , <4 x i32>* %p
 843   ret void
 844 }
 845
 846 ; CHECK-LABEL: store_v4i32_with_folded_offset:
 847 ; NO-SIMD128-NOT: v128
 848 ; SIMD128-NEXT: .functype store_v4i32_with_folded_offset (v128, i32) -> (){{$}}
 849 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
 850 define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) {
 851   %q = ptrtoint <4 x i32>* %p to i32
 852   %r = add nuw i32 %q, 16
 853   %s = inttoptr i32 %r to <4 x i32>*
 854   store <4 x i32> %v , <4 x i32>* %s
 855   ret void
 856 }
 857
 858 ; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
 859 ; NO-SIMD128-NOT: v128
 860 ; SIMD128-NEXT: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> (){{$}}
 861 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
 862 define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
 863   %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1
 864   store <4 x i32> %v , <4 x i32>* %s
 865   ret void
 866 }
 867
 868 ; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
 869 ; NO-SIMD128-NOT: v128
 870 ; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
 871 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 872 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 873 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 874 define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) {
 875   %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
 876   store <4 x i32> %v , <4 x i32>* %s
 877   ret void
 878 }
 879
 880 ; CHECK-LABEL: store_v4i32_with_unfolded_offset:
 881 ; NO-SIMD128-NOT: v128
 882 ; SIMD128-NEXT: .functype store_v4i32_with_unfolded_offset (v128, i32) -> (){{$}}
 883 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
 884 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 885 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 886 define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) {
 887   %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1
 888   store <4 x i32> %v , <4 x i32>* %s
 889   ret void
 890 }
 891
 892 ; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
 893 ; NO-SIMD128-NOT: v128
 894 ; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
 895 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
 896 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
 897 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
 898 define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) {
 899   %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1
 900   store <4 x i32> %v , <4 x i32>* %s
 901   ret void
 902 }
 903
 904 ; CHECK-LABEL: store_v4i32_to_numeric_address:
 905 ; NO-SIMD128-NOT: v128
 906 ; SIMD128-NEXT: .functype store_v4i32_to_numeric_address (v128) -> (){{$}}
 907 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
 908 ; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
 909 define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
 910   %s = inttoptr i32 32 to <4 x i32>*
 911   store <4 x i32> %v , <4 x i32>* %s
 912   ret void
 913 }
 914
 915 ; CHECK-LABEL: store_v4i32_to_global_address:
 916 ; NO-SIMD128-NOT: v128
 917 ; SIMD128-NEXT: .functype store_v4i32_to_global_address (v128) -> (){{$}}
 918 ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
 919 ; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]), $0{{$}}
 920 define void @store_v4i32_to_global_address(<4 x i32> %v) {
 921   store <4 x i32> %v , <4 x i32>* @gv_v4i32
 922   ret void
 923 }
 924
 925 ; ==============================================================================
 926 ; 2 x i64
 927 ; ==============================================================================
 928 ; CHECK-LABEL: load_v2i64:
 929 ; NO-SIMD128-NOT: v128
 930 ; SIMD128-VM-NOT: v128
 931 ; SIMD128-NEXT: .functype load_v2i64 (i32) -> (v128){{$}}
 932 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
 933 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 934 define <2 x i64> @load_v2i64(<2 x i64>* %p) {
 935   %v = load <2 x i64>, <2 x i64>* %p
 936   ret <2 x i64> %v
 937 }
 938
 939 ; CHECK-LABEL: load_splat_v2i64:
 940 ; NO-SIMD128-NOT: v128
 941 ; SIMD128-VM-NOT: v128
 942 ; SIMD128-NEXT: .functype load_splat_v2i64 (i32) -> (v128){{$}}
 943 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
 944 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 945 define <2 x i64> @load_splat_v2i64(i64* %p) {
 946   %e = load i64, i64* %p
 947   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
 948   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
 949   ret <2 x i64> %v2
 950 }
 951
 952 ; CHECK-LABEL: load_v2i64_with_folded_offset:
 953 ; NO-SIMD128-NOT: v128
 954 ; SIMD128-VM-NOT: v128
 955 ; SIMD128-NEXT: .functype load_v2i64_with_folded_offset (i32) -> (v128){{$}}
 956 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
 957 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 958 define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) {
 959   %q = ptrtoint <2 x i64>* %p to i32
 960   %r = add nuw i32 %q, 16
 961   %s = inttoptr i32 %r to <2 x i64>*
 962   %v = load <2 x i64>, <2 x i64>* %s
 963   ret <2 x i64> %v
 964 }
 965
 966 ; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
 967 ; NO-SIMD128-NOT: v128
 968 ; SIMD128-VM-NOT: v128
 969 ; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128){{$}}
 970 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
 971 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 972 define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) {
 973   %q = ptrtoint i64* %p to i32
 974   %r = add nuw i32 %q, 16
 975   %s = inttoptr i32 %r to i64*
 976   %e = load i64, i64* %s
 977   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
 978   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
 979   ret <2 x i64> %v2
 980 }
 981
 982 ; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
 983 ; NO-SIMD128-NOT: v128
 984 ; SIMD128-VM-NOT: v128
 985 ; SIMD128-NEXT: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
 986 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
 987 ; SIMD128-NEXT: return $pop[[R]]{{$}}
 988 define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) {
 989   %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
 990   %v = load <2 x i64>, <2 x i64>* %s
 991   ret <2 x i64> %v
 992 }
 993
 994 ; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
 995 ; NO-SIMD128-NOT: v128
 996 ; SIMD128-VM-NOT: v128
 997 ; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128){{$}}
 998 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
 999 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1000 define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) {
1001   %s = getelementptr inbounds i64, i64* %p, i32 1
1002   %e = load i64, i64* %s
1003   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1004   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1005   ret <2 x i64> %v2
1006 }
1007
1008 ; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
1009 ; NO-SIMD128-NOT: v128
1010 ; SIMD128-VM-NOT: v128
1011 ; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1012 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1013 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1014 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1015 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1016 define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) {
1017   %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1018   %v = load <2 x i64>, <2 x i64>* %s
1019   ret <2 x i64> %v
1020 }
1021
1022 ; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
1023 ; NO-SIMD128-NOT: v128
1024 ; SIMD128-VM-NOT: v128
1025 ; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1026 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1027 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1028 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1029 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1030 define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) {
1031   %s = getelementptr inbounds i64, i64* %p, i32 -1
1032   %e = load i64, i64* %s
1033   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1034   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1035   ret <2 x i64> %v2
1036 }
1037
1038 ; CHECK-LABEL: load_v2i64_with_unfolded_offset:
1039 ; NO-SIMD128-NOT: v128
1040 ; SIMD128-VM-NOT: v128
1041 ; SIMD128-NEXT: .functype load_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1042 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1043 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1044 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1045 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1046 define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) {
1047   %q = ptrtoint <2 x i64>* %p to i32
1048   %r = add nsw i32 %q, 16
1049   %s = inttoptr i32 %r to <2 x i64>*
1050   %v = load <2 x i64>, <2 x i64>* %s
1051   ret <2 x i64> %v
1052 }
1053
1054 ; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
1055 ; NO-SIMD128-NOT: v128
1056 ; SIMD128-VM-NOT: v128
1057 ; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128){{$}}
1058 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1059 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1060 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1061 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1062 define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) {
1063   %q = ptrtoint i64* %p to i32
1064   %r = add nsw i32 %q, 16
1065   %s = inttoptr i32 %r to i64*
1066   %e = load i64, i64* %s
1067   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1068   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1069   ret <2 x i64> %v2
1070 }
1071
1072 ; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
1073 ; NO-SIMD128-NOT: v128
1074 ; SIMD128-VM-NOT: v128
1075 ; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1076 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1077 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1078 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1079 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1080 define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) {
1081   %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
1082   %v = load <2 x i64>, <2 x i64>* %s
1083   ret <2 x i64> %v
1084 }
1085
1086 ; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
1087 ; NO-SIMD128-NOT: v128
1088 ; SIMD128-VM-NOT: v128
1089 ; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1090 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1091 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1092 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1093 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1094 define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) {
1095   %s = getelementptr i64, i64* %p, i32 1
1096   %e = load i64, i64* %s
1097   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1098   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1099   ret <2 x i64> %v2
1100 }
1101
1102 ; CHECK-LABEL: load_v2i64_from_numeric_address:
1103 ; NO-SIMD128-NOT: v128
1104 ; SIMD128-VM-NOT: v128
1105 ; SIMD128-NEXT: .functype load_v2i64_from_numeric_address () -> (v128){{$}}
1106 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1107 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1108 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1109 define <2 x i64> @load_v2i64_from_numeric_address() {
1110   %s = inttoptr i32 32 to <2 x i64>*
1111   %v = load <2 x i64>, <2 x i64>* %s
1112   ret <2 x i64> %v
1113 }
1114
1115 ; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
1116 ; NO-SIMD128-NOT: v128
1117 ; SIMD128-VM-NOT: v128
1118 ; SIMD128-NEXT: .functype load_splat_v2i64_from_numeric_address () -> (v128){{$}}
1119 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1120 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1121 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1122 define <2 x i64> @load_splat_v2i64_from_numeric_address() {
1123   %s = inttoptr i32 32 to i64*
1124   %e = load i64, i64* %s
1125   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1126   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1127   ret <2 x i64> %v2
1128 }
1129
1130 ; CHECK-LABEL: load_v2i64_from_global_address:
1131 ; NO-SIMD128-NOT: v128
1132 ; SIMD128-VM-NOT: v128
1133 ; SIMD128-NEXT: .functype load_v2i64_from_global_address () -> (v128){{$}}
1134 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1135 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]){{$}}
1136 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1137 @gv_v2i64 = global <2 x i64> <i64 42, i64 42>
1138 define <2 x i64> @load_v2i64_from_global_address() {
1139   %v = load <2 x i64>, <2 x i64>* @gv_v2i64
1140   ret <2 x i64> %v
1141 }
1142
1143 ; CHECK-LABEL: load_splat_v2i64_from_global_address:
1144 ; NO-SIMD128-NOT: v128
1145 ; SIMD128-VM-NOT: v128
1146 ; SIMD128-NEXT: .functype load_splat_v2i64_from_global_address () -> (v128){{$}}
1147 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1148 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_i64($pop[[L0]]){{$}}
1149 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1150 @gv_i64 = global i64 42
1151 define <2 x i64> @load_splat_v2i64_from_global_address() {
1152   %e = load i64, i64* @gv_i64
1153   %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
1154   %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
1155   ret <2 x i64> %v2
1156 }
1157
1158 ; CHECK-LABEL: store_v2i64:
1159 ; NO-SIMD128-NOT: v128
1160 ; SIMD128-VM-NOT: v128
1161 ; SIMD128-NEXT: .functype store_v2i64 (v128, i32) -> (){{$}}
1162 ; SIMD128-NEXT: v128.store 0($1), $0{{$}}
1163 define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) {
1164   store <2 x i64> %v , <2 x i64>* %p
1165   ret void
1166 }
1167
1168 ; CHECK-LABEL: store_v2i64_with_folded_offset:
1169 ; NO-SIMD128-NOT: v128
1170 ; SIMD128-VM-NOT: v128
1171 ; SIMD128-NEXT: .functype store_v2i64_with_folded_offset (v128, i32) -> (){{$}}
1172 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1173 define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) {
1174   %q = ptrtoint <2 x i64>* %p to i32
1175   %r = add nuw i32 %q, 16
1176   %s = inttoptr i32 %r to <2 x i64>*
1177   store <2 x i64> %v , <2 x i64>* %s
1178   ret void
1179 }
1180
1181 ; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
1182 ; NO-SIMD128-NOT: v128
1183 ; SIMD128-VM-NOT: v128
1184 ; SIMD128-NEXT: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> (){{$}}
1185 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1186 define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
1187   %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1
1188   store <2 x i64> %v , <2 x i64>* %s
1189   ret void
1190 }
1191
1192 ; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
1193 ; NO-SIMD128-NOT: v128
1194 ; SIMD128-VM-NOT: v128
1195 ; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
1196 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1197 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1198 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1199 define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) {
1200   %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1201   store <2 x i64> %v , <2 x i64>* %s
1202   ret void
1203 }
1204
1205 ; CHECK-LABEL: store_v2i64_with_unfolded_offset:
1206 ; NO-SIMD128-NOT: v128
1207 ; SIMD128-VM-NOT: v128
1208 ; SIMD128-NEXT: .functype store_v2i64_with_unfolded_offset (v128, i32) -> (){{$}}
1209 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1210 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1211 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1212 define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) {
1213   %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1
1214   store <2 x i64> %v , <2 x i64>* %s
1215   ret void
1216 }
1217
1218 ; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
1219 ; NO-SIMD128-NOT: v128
1220 ; SIMD128-VM-NOT: v128
1221 ; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
1222 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1223 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1224 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1225 define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) {
1226   %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1
1227   store <2 x i64> %v , <2 x i64>* %s
1228   ret void
1229 }
1230
1231 ; CHECK-LABEL: store_v2i64_to_numeric_address:
1232 ; NO-SIMD128-NOT: v128
1233 ; SIMD128-VM-NOT: v128
1234 ; SIMD128-NEXT: .functype store_v2i64_to_numeric_address (v128) -> (){{$}}
1235 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1236 ; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
1237 define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
1238   %s = inttoptr i32 32 to <2 x i64>*
1239   store <2 x i64> %v , <2 x i64>* %s
1240   ret void
1241 }
1242
1243 ; CHECK-LABEL: store_v2i64_to_global_address:
1244 ; NO-SIMD128-NOT: v128
1245 ; SIMD128-VM-NOT: v128
1246 ; SIMD128-NEXT: .functype store_v2i64_to_global_address (v128) -> (){{$}}
1247 ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1248 ; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]), $0{{$}}
1249 define void @store_v2i64_to_global_address(<2 x i64> %v) {
1250   store <2 x i64> %v , <2 x i64>* @gv_v2i64
1251   ret void
1252 }
1253
1254 ; ==============================================================================
1255 ; 4 x float
1256 ; ==============================================================================
1257 ; CHECK-LABEL: load_v4f32:
1258 ; NO-SIMD128-NOT: v128
1259 ; SIMD128-NEXT: .functype load_v4f32 (i32) -> (v128){{$}}
1260 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
1261 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1262 define <4 x float> @load_v4f32(<4 x float>* %p) {
1263   %v = load <4 x float>, <4 x float>* %p
1264   ret <4 x float> %v
1265 }
1266
1267 ; CHECK-LABEL: load_splat_v4f32:
1268 ; NO-SIMD128-NOT: v128
1269 ; SIMD128-NEXT: .functype load_splat_v4f32 (i32) -> (v128){{$}}
1270 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
1271 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1272 define <4 x float> @load_splat_v4f32(float* %p) {
1273   %e = load float, float* %p
1274   %v1 = insertelement <4 x float> undef, float %e, i32 0
1275   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1276   ret <4 x float> %v2
1277 }
1278
1279 ; CHECK-LABEL: load_v4f32_with_folded_offset:
1280 ; NO-SIMD128-NOT: v128
1281 ; SIMD128-NEXT: .functype load_v4f32_with_folded_offset (i32) -> (v128){{$}}
1282 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1283 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1284 define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) {
1285   %q = ptrtoint <4 x float>* %p to i32
1286   %r = add nuw i32 %q, 16
1287   %s = inttoptr i32 %r to <4 x float>*
1288   %v = load <4 x float>, <4 x float>* %s
1289   ret <4 x float> %v
1290 }
1291
1292 ; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
1293 ; NO-SIMD128-NOT: v128
1294 ; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128){{$}}
1295 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1296 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1297 define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) {
1298   %q = ptrtoint float* %p to i32
1299   %r = add nuw i32 %q, 16
1300   %s = inttoptr i32 %r to float*
1301   %e = load float, float* %s
1302   %v1 = insertelement <4 x float> undef, float %e, i32 0
1303   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1304   ret <4 x float> %v2
1305 }
1306
1307 ; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
1308 ; NO-SIMD128-NOT: v128
1309 ; SIMD128-NEXT: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
1310 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1311 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1312 define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) {
1313   %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
1314   %v = load <4 x float>, <4 x float>* %s
1315   ret <4 x float> %v
1316 }
1317
1318 ; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
1319 ; NO-SIMD128-NOT: v128
1320 ; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128){{$}}
1321 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}}
1322 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1323 define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) {
1324   %s = getelementptr inbounds float, float* %p, i32 1
1325   %e = load float, float* %s
1326   %v1 = insertelement <4 x float> undef, float %e, i32 0
1327   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1328   ret <4 x float> %v2
1329 }
1330
1331 ; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
1332 ; NO-SIMD128-NOT: v128
1333 ; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1334 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1335 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1336 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1337 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1338 define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) {
1339   %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
1340   %v = load <4 x float>, <4 x float>* %s
1341   ret <4 x float> %v
1342 }
1343
1344 ; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
1345 ; NO-SIMD128-NOT: v128
1346 ; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1347 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}}
1348 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1349 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1350 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1351 define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) {
1352   %s = getelementptr inbounds float, float* %p, i32 -1
1353   %e = load float, float* %s
1354   %v1 = insertelement <4 x float> undef, float %e, i32 0
1355   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1356   ret <4 x float> %v2
1357 }
1358
1359 ; CHECK-LABEL: load_v4f32_with_unfolded_offset:
1360 ; NO-SIMD128-NOT: v128
1361 ; SIMD128-NEXT: .functype load_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
1362 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1363 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1364 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1365 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1366 define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) {
1367   %q = ptrtoint <4 x float>* %p to i32
1368   %r = add nsw i32 %q, 16
1369   %s = inttoptr i32 %r to <4 x float>*
1370   %v = load <4 x float>, <4 x float>* %s
1371   ret <4 x float> %v
1372 }
1373
1374 ; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
1375 ; NO-SIMD128-NOT: v128
1376 ; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128){{$}}
1377 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1378 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1379 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1380 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1381 define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) {
1382   %q = ptrtoint float* %p to i32
1383   %r = add nsw i32 %q, 16
1384   %s = inttoptr i32 %r to float*
1385   %e = load float, float* %s
1386   %v1 = insertelement <4 x float> undef, float %e, i32 0
1387   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1388   ret <4 x float> %v2
1389 }
1390
1391 ; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
1392 ; NO-SIMD128-NOT: v128
1393 ; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1394 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1395 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1396 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1397 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1398 define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) {
1399   %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
1400   %v = load <4 x float>, <4 x float>* %s
1401   ret <4 x float> %v
1402 }
1403
1404 ; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
1405 ; NO-SIMD128-NOT: v128
1406 ; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}}
1407 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}}
1408 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1409 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1410 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1411 define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) {
1412   %s = getelementptr float, float* %p, i32 1
1413   %e = load float, float* %s
1414   %v1 = insertelement <4 x float> undef, float %e, i32 0
1415   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1416   ret <4 x float> %v2
1417 }
1418
1419 ; CHECK-LABEL: load_v4f32_from_numeric_address:
1420 ; NO-SIMD128-NOT: v128
1421 ; SIMD128-NEXT: .functype load_v4f32_from_numeric_address () -> (v128){{$}}
1422 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1423 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1424 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1425 define <4 x float> @load_v4f32_from_numeric_address() {
1426   %s = inttoptr i32 32 to <4 x float>*
1427   %v = load <4 x float>, <4 x float>* %s
1428   ret <4 x float> %v
1429 }
1430
1431 ; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
1432 ; NO-SIMD128-NOT: v128
1433 ; SIMD128-NEXT: .functype load_splat_v4f32_from_numeric_address () -> (v128){{$}}
1434 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1435 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1436 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1437 define <4 x float> @load_splat_v4f32_from_numeric_address() {
1438   %s = inttoptr i32 32 to float*
1439   %e = load float, float* %s
1440   %v1 = insertelement <4 x float> undef, float %e, i32 0
1441   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1442   ret <4 x float> %v2
1443 }
1444
1445 ; CHECK-LABEL: load_v4f32_from_global_address:
1446 ; NO-SIMD128-NOT: v128
1447 ; SIMD128-NEXT: .functype load_v4f32_from_global_address () -> (v128){{$}}
1448 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1449 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]){{$}}
1450 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1451 @gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
1452 define <4 x float> @load_v4f32_from_global_address() {
1453   %v = load <4 x float>, <4 x float>* @gv_v4f32
1454   ret <4 x float> %v
1455 }
1456
1457 ; CHECK-LABEL: load_splat_v4f32_from_global_address:
1458 ; NO-SIMD128-NOT: v128
1459 ; SIMD128-NEXT: .functype load_splat_v4f32_from_global_address () -> (v128){{$}}
1460 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1461 ; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_f32($pop[[L0]]){{$}}
1462 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1463 @gv_f32 = global float 42.
1464 define <4 x float> @load_splat_v4f32_from_global_address() {
1465   %e = load float, float* @gv_f32
1466   %v1 = insertelement <4 x float> undef, float %e, i32 0
1467   %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
1468   ret <4 x float> %v2
1469 }
1470
1471 ; CHECK-LABEL: store_v4f32:
1472 ; NO-SIMD128-NOT: v128
1473 ; SIMD128-NEXT: .functype store_v4f32 (v128, i32) -> (){{$}}
1474 ; SIMD128-NEXT: v128.store 0($1), $0{{$}}
1475 define void @store_v4f32(<4 x float> %v, <4 x float>* %p) {
1476   store <4 x float> %v , <4 x float>* %p
1477   ret void
1478 }
1479
1480 ; CHECK-LABEL: store_v4f32_with_folded_offset:
1481 ; NO-SIMD128-NOT: v128
1482 ; SIMD128-NEXT: .functype store_v4f32_with_folded_offset (v128, i32) -> (){{$}}
1483 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1484 define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) {
1485   %q = ptrtoint <4 x float>* %p to i32
1486   %r = add nuw i32 %q, 16
1487   %s = inttoptr i32 %r to <4 x float>*
1488   store <4 x float> %v , <4 x float>* %s
1489   ret void
1490 }
1491
1492 ; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
1493 ; NO-SIMD128-NOT: v128
1494 ; SIMD128-NEXT: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> (){{$}}
1495 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1496 define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) {
1497   %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
1498   store <4 x float> %v , <4 x float>* %s
1499   ret void
1500 }
1501
1502 ; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
1503 ; NO-SIMD128-NOT: v128
1504 ; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
1505 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1506 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1507 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1508 define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) {
1509   %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
1510   store <4 x float> %v , <4 x float>* %s
1511   ret void
1512 }
1513
1514 ; CHECK-LABEL: store_v4f32_with_unfolded_offset:
1515 ; NO-SIMD128-NOT: v128
1516 ; SIMD128-NEXT: .functype store_v4f32_with_unfolded_offset (v128, i32) -> (){{$}}
1517 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1518 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1519 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1520 define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) {
1521   %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
1522   store <4 x float> %v , <4 x float>* %s
1523   ret void
1524 }
1525
1526 ; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
1527 ; NO-SIMD128-NOT: v128
1528 ; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> (){{$}}
1529 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1530 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1531 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1532 define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) {
1533   %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
1534   store <4 x float> %v , <4 x float>* %s
1535   ret void
1536 }
1537
1538 ; CHECK-LABEL: store_v4f32_to_numeric_address:
1539 ; NO-SIMD128-NOT: v128
1540 ; SIMD128-NEXT: .functype store_v4f32_to_numeric_address (v128) -> (){{$}}
1541 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1542 ; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
1543 define void @store_v4f32_to_numeric_address(<4 x float> %v) {
1544   %s = inttoptr i32 32 to <4 x float>*
1545   store <4 x float> %v , <4 x float>* %s
1546   ret void
1547 }
1548
1549 ; CHECK-LABEL: store_v4f32_to_global_address:
1550 ; NO-SIMD128-NOT: v128
1551 ; SIMD128-NEXT: .functype store_v4f32_to_global_address (v128) -> (){{$}}
1552 ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1553 ; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]), $0{{$}}
1554 define void @store_v4f32_to_global_address(<4 x float> %v) {
1555   store <4 x float> %v , <4 x float>* @gv_v4f32
1556   ret void
1557 }
1558
1559 ; ==============================================================================
1560 ; 2 x double
1561 ; ==============================================================================
1562 ; CHECK-LABEL: load_v2f64:
1563 ; NO-SIMD128-NOT: v128
1564 ; SIMD128-VM-NOT: v128
1565 ; SIMD128-NEXT: .functype load_v2f64 (i32) -> (v128){{$}}
1566 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}}
1567 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1568 define <2 x double> @load_v2f64(<2 x double>* %p) {
1569   %v = load <2 x double>, <2 x double>* %p
1570   ret <2 x double> %v
1571 }
1572
1573 ; CHECK-LABEL: load_splat_v2f64:
1574 ; NO-SIMD128-NOT: v128
1575 ; SIMD128-VM-NOT: v128
1576 ; SIMD128-NEXT: .functype load_splat_v2f64 (i32) -> (v128){{$}}
1577 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}}
1578 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1579 define <2 x double> @load_splat_v2f64(double* %p) {
1580   %e = load double, double* %p
1581   %v1 = insertelement <2 x double> undef, double %e, i32 0
1582   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1583   ret <2 x double> %v2
1584 }
1585
1586 ; CHECK-LABEL: load_v2f64_with_folded_offset:
1587 ; NO-SIMD128-NOT: v128
1588 ; SIMD128-VM-NOT: v128
1589 ; SIMD128-NEXT: .functype load_v2f64_with_folded_offset (i32) -> (v128){{$}}
1590 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1591 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1592 define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
1593   %q = ptrtoint <2 x double>* %p to i32
1594   %r = add nuw i32 %q, 16
1595   %s = inttoptr i32 %r to <2 x double>*
1596   %v = load <2 x double>, <2 x double>* %s
1597   ret <2 x double> %v
1598 }
1599
1600 ; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
1601 ; NO-SIMD128-NOT: v128
1602 ; SIMD128-VM-NOT: v128
1603 ; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128){{$}}
1604 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}}
1605 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1606 define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
1607   %q = ptrtoint double* %p to i32
1608   %r = add nuw i32 %q, 16
1609   %s = inttoptr i32 %r to double*
1610   %e = load double, double* %s
1611   %v1 = insertelement <2 x double> undef, double %e, i32 0
1612   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1613   ret <2 x double> %v2
1614 }
1615
1616 ; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
1617 ; NO-SIMD128-NOT: v128
1618 ; SIMD128-VM-NOT: v128
1619 ; SIMD128-NEXT: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
1620 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}}
1621 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1622 define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
1623   %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
1624   %v = load <2 x double>, <2 x double>* %s
1625   ret <2 x double> %v
1626 }
1627
1628 ; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
1629 ; NO-SIMD128-NOT: v128
1630 ; SIMD128-VM-NOT: v128
1631 ; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128){{$}}
1632 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}}
1633 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1634 define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
1635   %s = getelementptr inbounds double, double* %p, i32 1
1636   %e = load double, double* %s
1637   %v1 = insertelement <2 x double> undef, double %e, i32 0
1638   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1639   ret <2 x double> %v2
1640 }
1641
1642 ; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
1643 ; NO-SIMD128-NOT: v128
1644 ; SIMD128-VM-NOT: v128
1645 ; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1646 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1647 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1648 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1649 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1650 define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
1651   %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1652   %v = load <2 x double>, <2 x double>* %s
1653   ret <2 x double> %v
1654 }
1655
1656 ; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
1657 ; NO-SIMD128-NOT: v128
1658 ; SIMD128-VM-NOT: v128
1659 ; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}}
1660 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}}
1661 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1662 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1663 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1664 define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) {
1665   %s = getelementptr inbounds double, double* %p, i32 -1
1666   %e = load double, double* %s
1667   %v1 = insertelement <2 x double> undef, double %e, i32 0
1668   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1669   ret <2 x double> %v2
1670 }
1671
1672 ; CHECK-LABEL: load_v2f64_with_unfolded_offset:
1673 ; NO-SIMD128-NOT: v128
1674 ; SIMD128-VM-NOT: v128
1675 ; SIMD128-NEXT: .functype load_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
1676 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1677 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1678 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1679 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1680 define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
1681   %q = ptrtoint <2 x double>* %p to i32
1682   %r = add nsw i32 %q, 16
1683   %s = inttoptr i32 %r to <2 x double>*
1684   %v = load <2 x double>, <2 x double>* %s
1685   ret <2 x double> %v
1686 }
1687
1688 ; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
1689 ; NO-SIMD128-NOT: v128
1690 ; SIMD128-VM-NOT: v128
1691 ; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128){{$}}
1692 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1693 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1694 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1695 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1696 define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
1697   %q = ptrtoint double* %p to i32
1698   %r = add nsw i32 %q, 16
1699   %s = inttoptr i32 %r to double*
1700   %e = load double, double* %s
1701   %v1 = insertelement <2 x double> undef, double %e, i32 0
1702   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1703   ret <2 x double> %v2
1704 }
1705
1706 ; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
1707 ; NO-SIMD128-NOT: v128
1708 ; SIMD128-VM-NOT: v128
1709 ; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1710 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1711 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1712 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1713 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1714 define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
1715   %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
1716   %v = load <2 x double>, <2 x double>* %s
1717   ret <2 x double> %v
1718 }
1719
1720 ; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
1721 ; NO-SIMD128-NOT: v128
1722 ; SIMD128-VM-NOT: v128
1723 ; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}}
1724 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}}
1725 ; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
1726 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}}
1727 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1728 define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
1729   %s = getelementptr double, double* %p, i32 1
1730   %e = load double, double* %s
1731   %v1 = insertelement <2 x double> undef, double %e, i32 0
1732   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1733   ret <2 x double> %v2
1734 }
1735
1736 ; CHECK-LABEL: load_v2f64_from_numeric_address:
1737 ; NO-SIMD128-NOT: v128
1738 ; SIMD128-VM-NOT: v128
1739 ; SIMD128-NEXT: .functype load_v2f64_from_numeric_address () -> (v128){{$}}
1740 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1741 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1742 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1743 define <2 x double> @load_v2f64_from_numeric_address() {
1744   %s = inttoptr i32 32 to <2 x double>*
1745   %v = load <2 x double>, <2 x double>* %s
1746   ret <2 x double> %v
1747 }
1748
1749 ; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
1750 ; NO-SIMD128-NOT: v128
1751 ; SIMD128-VM-NOT: v128
1752 ; SIMD128-NEXT: .functype load_splat_v2f64_from_numeric_address () -> (v128){{$}}
1753 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1754 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}}
1755 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1756 define <2 x double> @load_splat_v2f64_from_numeric_address() {
1757   %s = inttoptr i32 32 to double*
1758   %e = load double, double* %s
1759   %v1 = insertelement <2 x double> undef, double %e, i32 0
1760   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1761   ret <2 x double> %v2
1762 }
1763
1764 ; CHECK-LABEL: load_v2f64_from_global_address:
1765 ; NO-SIMD128-NOT: v128
1766 ; SIMD128-VM-NOT: v128
1767 ; SIMD128-NEXT: .functype load_v2f64_from_global_address () -> (v128){{$}}
1768 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1769 ; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]){{$}}
1770 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1771 @gv_v2f64 = global <2 x double> <double 42., double 42.>
1772 define <2 x double> @load_v2f64_from_global_address() {
1773   %v = load <2 x double>, <2 x double>* @gv_v2f64
1774   ret <2 x double> %v
1775 }
1776
1777 ; CHECK-LABEL: load_splat_v2f64_from_global_address:
1778 ; NO-SIMD128-NOT: v128
1779 ; SIMD128-VM-NOT: v128
1780 ; SIMD128-NEXT: .functype load_splat_v2f64_from_global_address () -> (v128){{$}}
1781 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1782 ; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_f64($pop[[L0]]){{$}}
1783 ; SIMD128-NEXT: return $pop[[R]]{{$}}
1784 @gv_f64 = global double 42.
1785 define <2 x double> @load_splat_v2f64_from_global_address() {
1786   %e = load double, double* @gv_f64
1787   %v1 = insertelement <2 x double> undef, double %e, i32 0
1788   %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
1789   ret <2 x double> %v2
1790 }
1791
1792 ; CHECK-LABEL: store_v2f64:
1793 ; NO-SIMD128-NOT: v128
1794 ; SIMD128-VM-NOT: v128
1795 ; SIMD128-NEXT: .functype store_v2f64 (v128, i32) -> (){{$}}
1796 ; SIMD128-NEXT: v128.store 0($1), $0{{$}}
1797 define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
1798   store <2 x double> %v , <2 x double>* %p
1799   ret void
1800 }
1801
1802 ; CHECK-LABEL: store_v2f64_with_folded_offset:
1803 ; NO-SIMD128-NOT: v128
1804 ; SIMD128-VM-NOT: v128
1805 ; SIMD128-NEXT: .functype store_v2f64_with_folded_offset (v128, i32) -> (){{$}}
1806 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1807 define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) {
1808   %q = ptrtoint <2 x double>* %p to i32
1809   %r = add nuw i32 %q, 16
1810   %s = inttoptr i32 %r to <2 x double>*
1811   store <2 x double> %v , <2 x double>* %s
1812   ret void
1813 }
1814
1815 ; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
1816 ; NO-SIMD128-NOT: v128
1817 ; SIMD128-VM-NOT: v128
1818 ; SIMD128-NEXT: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> (){{$}}
1819 ; SIMD128-NEXT: v128.store 16($1), $0{{$}}
1820 define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) {
1821   %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1
1822   store <2 x double> %v , <2 x double>* %s
1823   ret void
1824 }
1825
1826 ; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
1827 ; NO-SIMD128-NOT: v128
1828 ; SIMD128-VM-NOT: v128
1829 ; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}}
1830 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1831 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1832 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1833 define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) {
1834   %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1835   store <2 x double> %v , <2 x double>* %s
1836   ret void
1837 }
1838
1839 ; CHECK-LABEL: store_v2f64_with_unfolded_offset:
1840 ; NO-SIMD128-NOT: v128
1841 ; SIMD128-VM-NOT: v128
1842 ; SIMD128-NEXT: .functype store_v2f64_with_unfolded_offset (v128, i32) -> (){{$}}
1843 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}}
1844 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1845 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1846 define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) {
1847   %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1
1848   store <2 x double> %v , <2 x double>* %s
1849   ret void
1850 }
1851
1852 ; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
1853 ; NO-SIMD128-NOT: v128
1854 ; SIMD128-VM-NOT: v128
1855 ; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> (){{$}}
1856 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}}
1857 ; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}}
1858 ; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}}
1859 define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) {
1860   %s = getelementptr <2 x double>, <2 x double>* %p, i32 1
1861   store <2 x double> %v , <2 x double>* %s
1862   ret void
1863 }
1864
1865 ; CHECK-LABEL: store_v2f64_to_numeric_address:
1866 ; NO-SIMD128-NOT: v128
1867 ; SIMD128-VM-NOT: v128
1868 ; SIMD128-NEXT: .functype store_v2f64_to_numeric_address (v128) -> (){{$}}
1869 ; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
1870 ; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}}
1871 define void @store_v2f64_to_numeric_address(<2 x double> %v) {
1872   %s = inttoptr i32 32 to <2 x double>*
1873   store <2 x double> %v , <2 x double>* %s
1874   ret void
1875 }
1876
1877 ; CHECK-LABEL: store_v2f64_to_global_address:
1878 ; NO-SIMD128-NOT: v128
1879 ; SIMD128-VM-NOT: v128
1880 ; SIMD128-NEXT: .functype store_v2f64_to_global_address (v128) -> (){{$}}
1881 ; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}}
1882 ; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]), $0{{$}}
1883 define void @store_v2f64_to_global_address(<2 x double> %v) {
1884   store <2 x double> %v , <2 x double>* @gv_v2f64
1885   ret void
1886 }