From f343dcd487169c85fc73f0e2c1cf5995a99cd55d Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 15 Sep 2019 14:14:47 +0000 Subject: [PATCH] [ARM] Masked loads and stores Masked loads and store fit naturally with MVE, the instructions being easily predicated. This adds lowering for the simple cases of masked loads and stores. It does not yet deal with widening/narrowing or pre/post inc, and so is currently behind an option. The llvm masked load intrinsic will accept a "passthru" value, dictating the values used for the zero masked lanes. In MVE the instructions write 0 to the zero predicated lanes, so we need to match a passthru that isn't 0 (or undef) with a select instruction to pull in the correct data after the load. Differential Revision: https://reviews.llvm.org/D67186 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371932 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 31 + lib/Target/ARM/ARMInstrMVE.td | 83 + lib/Target/ARM/ARMTargetTransformInfo.cpp | 20 + lib/Target/ARM/ARMTargetTransformInfo.h | 3 + test/CodeGen/Thumb2/mve-masked-ldst.ll | 2236 +++----- test/CodeGen/Thumb2/mve-masked-load.ll | 5403 ++++---------------- test/CodeGen/Thumb2/mve-masked-store.ll | 3566 +++---------- .../Transforms/LoopVectorize/ARM/mve-maskedldst.ll | 40 + 8 files changed, 2496 insertions(+), 8886 deletions(-) rewrite test/CodeGen/Thumb2/mve-masked-ldst.ll (61%) rewrite test/CodeGen/Thumb2/mve-masked-load.ll (82%) rewrite test/CodeGen/Thumb2/mve-masked-store.ll (81%) create mode 100644 test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d341ee820d3..0ffb931bc4f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -259,6 +259,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); @@ -304,6 +306,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Legal); // Pre and Post inc are supported on loads and stores for (unsigned im = (unsigned)ISD::PRE_INC; @@ -8848,6 +8852,31 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { ST->getMemOperand()); } +static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { + MaskedLoadSDNode *N = cast(Op.getNode()); + MVT VT = Op.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDValue PassThru = N->getPassThru(); + SDLoc dl(Op); + + if (ISD::isBuildVectorAllZeros(PassThru.getNode()) || + (PassThru->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(PassThru->getOperand(0)))) + return Op; + + // MVE Masked loads use zero as the passthru value. Here we convert undef to + // zero too, and other values are lowered to a select. + SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(0, dl, MVT::i32)); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), + N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); + SDValue Combo = NewLoad; + if (!PassThru.isUndef()) + Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); + return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or @@ -9051,6 +9080,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerPredicateLoad(Op, DAG); case ISD::STORE: return LowerPredicateStore(Op, DAG); + case ISD::MLOAD: + return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index 93c976a85e1..5b4b65ada4e 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -4892,6 +4892,10 @@ class MVE_vector_store_typed : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; +class MVE_vector_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr, (i32 1), VCCR:$pred)>; multiclass MVE_vector_store { @@ -4908,6 +4912,10 @@ class MVE_vector_load_typed : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), (Ty (RegImmInst t2addrmode_imm7:$addr))>; +class MVE_vector_maskedload_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7:$addr, (i32 1), VCCR:$pred))>; multiclass MVE_vector_load { @@ -4953,6 +4961,28 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), return cast(N)->getAlignment() >= 2; }]>; +def alignedmaskedload32 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def alignedmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + return cast(N)->getAlignment() >= 2; +}]>; +def maskedload : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru)>; + +def alignedmaskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def alignedmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast(N)->getAlignment() >= 2; +}]>; +def maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred)>; + let Predicates = [HasMVEInt, IsLE] in { // Stores defm : MVE_vector_store; @@ -4971,6 +5001,26 @@ let Predicates = [HasMVEInt, IsLE] in { defm : MVE_vector_offset_store; defm : MVE_vector_offset_store; defm : MVE_vector_offset_store; + + // Unaligned masked stores (aligned are below) + def : Pat<(maskedstore (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + + // Unaligned masked loads + def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))), + (v4f32 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))), + (v8f16 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; } let Predicates = [HasMVEInt, IsBE] in { @@ -5025,8 +5075,41 @@ let Predicates = [HasMVEInt, IsBE] in { def : MVE_vector_offset_store_typed; def : MVE_vector_offset_store_typed; def : MVE_vector_offset_store_typed; + + // Unaligned masked stores (aligned are below) + def : Pat<(maskedstore (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(maskedstore (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + // Unaligned masked loads + def : Pat<(v4i32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; + def : Pat<(v4f32 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v4f32 NEONimmAllZerosV))), + (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; + def : Pat<(v8i16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; + def : Pat<(v8f16 (maskedload t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8f16 NEONimmAllZerosV))), + (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)))>; } +let Predicates = [HasMVEInt] in { + // Aligned masked store, shared between LE and BE + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + // Aligned masked loads + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; +} // Widening/Narrowing Loads/Stores diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index e74b2b1a2ba..e8ac760786f 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -36,6 +36,10 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +static cl::opt EnableMaskedLoadStores( + "enable-arm-maskedldst", cl::Hidden, cl::init(false), + cl::desc("Enable the generation of masked loads and stores")); + static cl::opt DisableLowOverheadLoops( "disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops")); @@ -487,6 +491,22 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, return BaseT::getAddressComputationCost(Ty, SE, Ptr); } +bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy) { + if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps()) + return false; + + if (DataTy->isVectorTy()) { + // We don't yet support narrowing or widening masked loads/stores. Expand + // them for the moment. + unsigned VecWidth = DataTy->getPrimitiveSizeInBits(); + if (VecWidth != 128) + return false; + } + + unsigned EltWidth = DataTy->getScalarSizeInBits(); + return EltWidth == 32 || EltWidth == 16 || EltWidth == 8; +} + int ARMTTIImpl::getMemcpyCost(const Instruction *I) { const MemCpyInst *MI = dyn_cast(I); assert(MI && "MemcpyInst expected"); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 507e0188549..47e98dac9f6 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -152,6 +152,9 @@ public: return ST->getMaxInterleaveFactor(); } + bool isLegalMaskedLoad(Type *DataTy); + bool isLegalMaskedStore(Type *DataTy) { return isLegalMaskedLoad(DataTy); } + int getMemcpyCost(const Instruction *I); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); diff --git a/test/CodeGen/Thumb2/mve-masked-ldst.ll b/test/CodeGen/Thumb2/mve-masked-ldst.ll dissimilarity index 61% index f7d6a3f3799..af619354cec 100644 --- a/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -1,1573 +1,663 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE -; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE - -define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src) { -; CHECK-LABEL: foo_v4i32_v4i32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrne r3, [r2] -; CHECK-NEXT: vmovne.32 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.32 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r3, [r2, #8] -; CHECK-NEXT: vmovmi.32 q0[2], r3 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r1, [r2, #12] -; CHECK-NEXT: vmovmi.32 q0[3], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 -; CHECK-NEXT: strne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 -; CHECK-NEXT: strmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 -; CHECK-NEXT: strmi r2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 -; CHECK-NEXT: strmi r1, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) - call void @llvm.masked.store.v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1) - ret void -} - -define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) { -; CHECK-LABEL: foo_sext_v4i32_v4i8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrbne r3, [r2] -; CHECK-NEXT: vmovne.32 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #1] -; CHECK-NEXT: vmovmi.32 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.32 q0[2], r3 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r1, [r2, #3] -; CHECK-NEXT: vmovmi.32 q0[3], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 -; CHECK-NEXT: strne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 -; CHECK-NEXT: strmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 -; CHECK-NEXT: strmi r2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 -; CHECK-NEXT: strmi r1, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) - %3 = sext <4 x i8> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) - ret void -} - -define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) { -; CHECK-LABEL: foo_sext_v4i32_v4i16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrhne r3, [r2] -; CHECK-NEXT: vmovne.32 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.32 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.32 q0[2], r3 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r1, [r2, #6] -; CHECK-NEXT: vmovmi.32 q0[3], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 -; CHECK-NEXT: strne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 -; CHECK-NEXT: strmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 -; CHECK-NEXT: strmi r2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 -; CHECK-NEXT: strmi r1, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) - %3 = sext <4 x i16> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) - ret void -} - -define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) { -; CHECK-LABEL: foo_zext_v4i32_v4i8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vmov.i32 q1, #0xff -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrbne r3, [r2] -; CHECK-NEXT: vmovne.32 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #1] -; CHECK-NEXT: vmovmi.32 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.32 q0[2], r3 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r1, [r2, #3] -; CHECK-NEXT: vmovmi.32 q0[3], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vand q0, q0, q1 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 -; CHECK-NEXT: strne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 -; CHECK-NEXT: strmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 -; CHECK-NEXT: strmi r2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 -; CHECK-NEXT: strmi r1, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) - %3 = zext <4 x i8> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) - ret void -} - -define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) { -; CHECK-LABEL: foo_zext_v4i32_v4i16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrhne r3, [r2] -; CHECK-NEXT: vmovne.32 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.32 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.32 q0[2], r3 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r1, [r2, #6] -; CHECK-NEXT: vmovmi.32 q0[3], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 -; CHECK-NEXT: strne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 -; CHECK-NEXT: strmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 -; CHECK-NEXT: strmi r2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 -; CHECK-NEXT: strmi r1, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) - %3 = zext <4 x i16> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) - ret void -} - -define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src) { -; CHECK-LABEL: foo_v8i16_v8i16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s16 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #2, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #4, #1 -; CHECK-NEXT: ubfx r1, r12, #10, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #5, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #6, #1 -; CHECK-NEXT: ubfx r1, r12, #14, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r3 -; CHECK-NEXT: lsls r3, r3, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrhne r3, [r2] -; CHECK-NEXT: vmovne.16 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.16 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.16 q0[2], r3 -; CHECK-NEXT: lsls r3, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #6] -; CHECK-NEXT: vmovmi.16 q0[3], r3 -; CHECK-NEXT: lsls r3, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #8] -; CHECK-NEXT: vmovmi.16 q0[4], r3 -; CHECK-NEXT: lsls r3, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #10] -; CHECK-NEXT: vmovmi.16 q0[5], r3 -; CHECK-NEXT: lsls r3, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #12] -; CHECK-NEXT: vmovmi.16 q0[6], r3 -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r1, [r2, #14] -; CHECK-NEXT: vmovmi.16 q0[7], r1 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: and r3, r1, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r1, #2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r1, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #2, #1 -; CHECK-NEXT: ubfx r3, r1, #6, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #3, #1 -; CHECK-NEXT: ubfx r3, r1, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #4, #1 -; CHECK-NEXT: ubfx r3, r1, #10, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #5, #1 -; CHECK-NEXT: ubfx r3, r1, #12, #1 -; CHECK-NEXT: ubfx r1, r1, #14, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r2, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r2 -; CHECK-NEXT: lsls r2, r2, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne.u16 r2, q0[0] -; CHECK-NEXT: strhne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-NEXT: strhmi r2, [r0, #2] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-NEXT: strhmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-NEXT: strhmi r2, [r0, #6] -; CHECK-NEXT: lsls r2, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-NEXT: strhmi r2, [r0, #8] -; CHECK-NEXT: lsls r2, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-NEXT: strhmi r2, [r0, #10] -; CHECK-NEXT: lsls r2, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-NEXT: strhmi r2, [r0, #12] -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-NEXT: strhmi r1, [r0, #14] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: bx lr -entry: - %0 = load <8 x i16>, <8 x i16>* %mask, align 2 - %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) - call void @llvm.masked.store.v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1) - ret void -} - -define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) { -; CHECK-LABEL: foo_sext_v8i16_v8i8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s16 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #2, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #4, #1 -; CHECK-NEXT: ubfx r1, r12, #10, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #5, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #6, #1 -; CHECK-NEXT: ubfx r1, r12, #14, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r3 -; CHECK-NEXT: lsls r3, r3, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrbne r3, [r2] -; CHECK-NEXT: vmovne.16 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #1] -; CHECK-NEXT: vmovmi.16 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.16 q0[2], r3 -; CHECK-NEXT: lsls r3, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #3] -; CHECK-NEXT: vmovmi.16 q0[3], r3 -; CHECK-NEXT: lsls r3, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.16 q0[4], r3 -; CHECK-NEXT: lsls r3, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #5] -; CHECK-NEXT: vmovmi.16 q0[5], r3 -; CHECK-NEXT: lsls r3, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #6] -; CHECK-NEXT: vmovmi.16 q0[6], r3 -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r1, [r2, #7] -; CHECK-NEXT: vmovmi.16 q0[7], r1 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: and r3, r1, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r1, #2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r1, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #2, #1 -; CHECK-NEXT: ubfx r3, r1, #6, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #3, #1 -; CHECK-NEXT: ubfx r3, r1, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #4, #1 -; CHECK-NEXT: ubfx r3, r1, #10, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #5, #1 -; CHECK-NEXT: ubfx r3, r1, #12, #1 -; CHECK-NEXT: ubfx r1, r1, #14, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r2, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r2 -; CHECK-NEXT: lsls r2, r2, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne.u16 r2, q0[0] -; CHECK-NEXT: strhne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-NEXT: strhmi r2, [r0, #2] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-NEXT: strhmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-NEXT: strhmi r2, [r0, #6] -; CHECK-NEXT: lsls r2, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-NEXT: strhmi r2, [r0, #8] -; CHECK-NEXT: lsls r2, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-NEXT: strhmi r2, [r0, #10] -; CHECK-NEXT: lsls r2, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-NEXT: strhmi r2, [r0, #12] -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-NEXT: strhmi r1, [r0, #14] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: bx lr -entry: - %0 = load <8 x i16>, <8 x i16>* %mask, align 2 - %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) - %3 = sext <8 x i8> %2 to <8 x i16> - call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) - ret void -} - -define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) { -; CHECK-LABEL: foo_zext_v8i16_v8i8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s16 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #2, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #4, #1 -; CHECK-NEXT: ubfx r1, r12, #10, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #5, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #6, #1 -; CHECK-NEXT: ubfx r1, r12, #14, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r3 -; CHECK-NEXT: lsls r3, r3, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrbne r3, [r2] -; CHECK-NEXT: vmovne.16 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #1] -; CHECK-NEXT: vmovmi.16 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.16 q0[2], r3 -; CHECK-NEXT: lsls r3, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #3] -; CHECK-NEXT: vmovmi.16 q0[3], r3 -; CHECK-NEXT: lsls r3, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.16 q0[4], r3 -; CHECK-NEXT: lsls r3, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #5] -; CHECK-NEXT: vmovmi.16 q0[5], r3 -; CHECK-NEXT: lsls r3, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #6] -; CHECK-NEXT: vmovmi.16 q0[6], r3 -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r1, [r2, #7] -; CHECK-NEXT: vmovmi.16 q0[7], r1 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: and r3, r1, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r1, #2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r1, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #2, #1 -; CHECK-NEXT: ubfx r3, r1, #6, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #3, #1 -; CHECK-NEXT: ubfx r3, r1, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #4, #1 -; CHECK-NEXT: ubfx r3, r1, #10, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #5, #1 -; CHECK-NEXT: ubfx r3, r1, #12, #1 -; CHECK-NEXT: ubfx r1, r1, #14, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r2, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r2 -; CHECK-NEXT: lsls r2, r2, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne.u16 r2, q0[0] -; CHECK-NEXT: strhne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-NEXT: strhmi r2, [r0, #2] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-NEXT: strhmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-NEXT: strhmi r2, [r0, #6] -; CHECK-NEXT: lsls r2, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-NEXT: strhmi r2, [r0, #8] -; CHECK-NEXT: lsls r2, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-NEXT: strhmi r2, [r0, #10] -; CHECK-NEXT: lsls r2, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-NEXT: strhmi r2, [r0, #12] -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-NEXT: strhmi r1, [r0, #14] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: bx lr -entry: - %0 = load <8 x i16>, <8 x i16>* %mask, align 2 - %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) - %3 = zext <8 x i8> %2 to <8 x i16> - call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) - ret void -} - -define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src) { -; CHECK-LABEL: foo_v16i8_v16i8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r6, r7, lr} -; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: .setfp r7, sp, #8 -; CHECK-NEXT: add r7, sp, #8 -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: mov r4, sp -; CHECK-NEXT: bfc r4, #0, #4 -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: sub.w r4, r7, #8 -; CHECK-NEXT: vcmp.s8 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r3, p0 -; CHECK-NEXT: uxth r1, r3 -; CHECK-NEXT: lsls r3, r3, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrbne r3, [r2] -; CHECK-NEXT: vmovne.8 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #1] -; CHECK-NEXT: vmovmi.8 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.8 q0[2], r3 -; CHECK-NEXT: lsls r3, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #3] -; CHECK-NEXT: vmovmi.8 q0[3], r3 -; CHECK-NEXT: lsls r3, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.8 q0[4], r3 -; CHECK-NEXT: lsls r3, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #5] -; CHECK-NEXT: vmovmi.8 q0[5], r3 -; CHECK-NEXT: lsls r3, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #6] -; CHECK-NEXT: vmovmi.8 q0[6], r3 -; CHECK-NEXT: lsls r3, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #7] -; CHECK-NEXT: vmovmi.8 q0[7], r3 -; CHECK-NEXT: lsls r3, r1, #23 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #8] -; CHECK-NEXT: vmovmi.8 q0[8], r3 -; CHECK-NEXT: lsls r3, r1, #22 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #9] -; CHECK-NEXT: vmovmi.8 q0[9], r3 -; CHECK-NEXT: lsls r3, r1, #21 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #10] -; CHECK-NEXT: vmovmi.8 q0[10], r3 -; CHECK-NEXT: lsls r3, r1, #20 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #11] -; CHECK-NEXT: vmovmi.8 q0[11], r3 -; CHECK-NEXT: lsls r3, r1, #19 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #12] -; CHECK-NEXT: vmovmi.8 q0[12], r3 -; CHECK-NEXT: lsls r3, r1, #18 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #13] -; CHECK-NEXT: vmovmi.8 q0[13], r3 -; CHECK-NEXT: lsls r3, r1, #17 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r3, [r2, #14] -; CHECK-NEXT: vmovmi.8 q0[14], r3 -; CHECK-NEXT: lsls r1, r1, #16 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrbmi r1, [r2, #15] -; CHECK-NEXT: vmovmi.8 q0[15], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: uxth r1, r2 -; CHECK-NEXT: lsls r2, r2, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne.u8 r2, q0[0] -; CHECK-NEXT: strbne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[1] -; CHECK-NEXT: strbmi r2, [r0, #1] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[2] -; CHECK-NEXT: strbmi r2, [r0, #2] -; CHECK-NEXT: lsls r2, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[3] -; CHECK-NEXT: strbmi r2, [r0, #3] -; CHECK-NEXT: lsls r2, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[4] -; CHECK-NEXT: strbmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[5] -; CHECK-NEXT: strbmi r2, [r0, #5] -; CHECK-NEXT: lsls r2, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[6] -; CHECK-NEXT: strbmi r2, [r0, #6] -; CHECK-NEXT: lsls r2, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[7] -; CHECK-NEXT: strbmi r2, [r0, #7] -; CHECK-NEXT: lsls r2, r1, #23 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[8] -; CHECK-NEXT: strbmi r2, [r0, #8] -; CHECK-NEXT: lsls r2, r1, #22 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[9] -; CHECK-NEXT: strbmi r2, [r0, #9] -; CHECK-NEXT: lsls r2, r1, #21 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[10] -; CHECK-NEXT: strbmi r2, [r0, #10] -; CHECK-NEXT: lsls r2, r1, #20 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[11] -; CHECK-NEXT: strbmi r2, [r0, #11] -; CHECK-NEXT: lsls r2, r1, #19 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[12] -; CHECK-NEXT: strbmi r2, [r0, #12] -; CHECK-NEXT: lsls r2, r1, #18 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[13] -; CHECK-NEXT: strbmi r2, [r0, #13] -; CHECK-NEXT: lsls r2, r1, #17 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r2, q0[14] -; CHECK-NEXT: strbmi r2, [r0, #14] -; CHECK-NEXT: lsls r1, r1, #16 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u8 r1, q0[15] -; CHECK-NEXT: strbmi r1, [r0, #15] -; CHECK-NEXT: mov sp, r4 -; CHECK-NEXT: pop {r4, r6, r7, pc} -entry: - %0 = load <16 x i8>, <16 x i8>* %mask, align 1 - %1 = icmp sgt <16 x i8> %0, zeroinitializer - %2 = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef) - call void @llvm.masked.store.v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1) - ret void -} - -define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *%src) { -; CHECK-LABEL: foo_trunc_v8i8_v8i16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s16 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #2, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #4, #1 -; CHECK-NEXT: ubfx r1, r12, #10, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #5, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #6, #1 -; CHECK-NEXT: ubfx r1, r12, #14, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r3 -; CHECK-NEXT: lsls r3, r3, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrhne r3, [r2] -; CHECK-NEXT: vmovne.16 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #2] -; CHECK-NEXT: vmovmi.16 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.16 q0[2], r3 -; CHECK-NEXT: lsls r3, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #6] -; CHECK-NEXT: vmovmi.16 q0[3], r3 -; CHECK-NEXT: lsls r3, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #8] -; CHECK-NEXT: vmovmi.16 q0[4], r3 -; CHECK-NEXT: lsls r3, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #10] -; CHECK-NEXT: vmovmi.16 q0[5], r3 -; CHECK-NEXT: lsls r3, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r3, [r2, #12] -; CHECK-NEXT: vmovmi.16 q0[6], r3 -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrhmi r1, [r2, #14] -; CHECK-NEXT: vmovmi.16 q0[7], r1 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: and r3, r1, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r1, #2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r1, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #2, #1 -; CHECK-NEXT: ubfx r3, r1, #6, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #3, #1 -; CHECK-NEXT: ubfx r3, r1, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #4, #1 -; CHECK-NEXT: ubfx r3, r1, #10, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #5, #1 -; CHECK-NEXT: ubfx r3, r1, #12, #1 -; CHECK-NEXT: ubfx r1, r1, #14, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r2, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r2 -; CHECK-NEXT: lsls r2, r2, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne.u16 r2, q0[0] -; CHECK-NEXT: strbne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-NEXT: strbmi r2, [r0, #1] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-NEXT: strbmi r2, [r0, #2] -; CHECK-NEXT: lsls r2, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-NEXT: strbmi r2, [r0, #3] -; CHECK-NEXT: lsls r2, r1, #27 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-NEXT: strbmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #26 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-NEXT: strbmi r2, [r0, #5] -; CHECK-NEXT: lsls r2, r1, #25 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-NEXT: strbmi r2, [r0, #6] -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-NEXT: strbmi r1, [r0, #7] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: bx lr -entry: - %0 = load <8 x i16>, <8 x i16>* %mask, align 2 - %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) - %3 = trunc <8 x i16> %2 to <8 x i8> - call void @llvm.masked.store.v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1) - ret void -} - -define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *%src) { -; CHECK-LABEL: foo_trunc_v4i8_v4i32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrne r3, [r2] -; CHECK-NEXT: vmovne.32 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.32 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r3, [r2, #8] -; CHECK-NEXT: vmovmi.32 q0[2], r3 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r1, [r2, #12] -; CHECK-NEXT: vmovmi.32 q0[3], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 -; CHECK-NEXT: strbne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 -; CHECK-NEXT: strbmi r2, [r0, #1] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 -; CHECK-NEXT: strbmi r2, [r0, #2] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 -; CHECK-NEXT: strbmi r1, [r0, #3] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) - %3 = trunc <4 x i32> %2 to <4 x i8> - call void @llvm.masked.store.v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1) - ret void -} - -define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> *%src) { -; CHECK-LABEL: foo_trunc_v4i16_v4i32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrne r3, [r2] -; CHECK-NEXT: vmovne.32 q0[0], r3 -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r3, [r2, #4] -; CHECK-NEXT: vmovmi.32 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r3, [r2, #8] -; CHECK-NEXT: vmovmi.32 q0[2], r3 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi r1, [r2, #12] -; CHECK-NEXT: vmovmi.32 q0[3], r1 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s0 -; CHECK-NEXT: strhne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s1 -; CHECK-NEXT: strhmi r2, [r0, #2] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s2 -; CHECK-NEXT: strhmi r2, [r0, #4] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s3 -; CHECK-NEXT: strhmi r1, [r0, #6] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) - %3 = trunc <4 x i32> %2 to <4 x i16> - call void @llvm.masked.store.v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1) - ret void -} - -define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> *%src) { -; CHECK-LABEL: foo_v4f32_v4f32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: and r1, r3, #15 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: it ne -; CHECK-NEXT: vldrne s0, [r2] -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: it mi -; CHECK-NEXT: vldrmi s1, [r2, #4] -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: it mi -; CHECK-NEXT: vldrmi s2, [r2, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: it mi -; CHECK-NEXT: vldrmi s3, [r2, #12] -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: and r1, r1, #15 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: it ne -; CHECK-NEXT: vstrne s0, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: it mi -; CHECK-NEXT: vstrmi s1, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: it mi -; CHECK-NEXT: vstrmi s2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: it mi -; CHECK-NEXT: vstrmi s3, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: bx lr -entry: - %0 = load <4 x i32>, <4 x i32>* %mask, align 4 - %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef) - call void @llvm.masked.store.v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1) - ret void -} - -define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%src) { -; CHECK-LABEL: foo_v8f16_v8f16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vcmp.s16 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r1, r12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #0, #1 -; CHECK-NEXT: ubfx r1, r12, #2, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #1, #1 -; CHECK-NEXT: ubfx r1, r12, #4, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #2, #1 -; CHECK-NEXT: ubfx r1, r12, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #3, #1 -; CHECK-NEXT: ubfx r1, r12, #8, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #4, #1 -; CHECK-NEXT: ubfx r1, r12, #10, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #5, #1 -; CHECK-NEXT: ubfx r1, r12, #12, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #6, #1 -; CHECK-NEXT: ubfx r1, r12, #14, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r3, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r3 -; CHECK-NEXT: lsls r3, r3, #31 -; CHECK-NEXT: bne .LBB13_18 -; CHECK-NEXT: @ %bb.1: @ %else -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: bmi .LBB13_19 -; CHECK-NEXT: .LBB13_2: @ %else2 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: bmi .LBB13_20 -; CHECK-NEXT: .LBB13_3: @ %else5 -; CHECK-NEXT: lsls r3, r1, #28 -; CHECK-NEXT: bmi .LBB13_21 -; CHECK-NEXT: .LBB13_4: @ %else8 -; CHECK-NEXT: lsls r3, r1, #27 -; CHECK-NEXT: bmi .LBB13_22 -; CHECK-NEXT: .LBB13_5: @ %else11 -; CHECK-NEXT: lsls r3, r1, #26 -; CHECK-NEXT: bmi .LBB13_23 -; CHECK-NEXT: .LBB13_6: @ %else14 -; CHECK-NEXT: lsls r3, r1, #25 -; CHECK-NEXT: bmi .LBB13_24 -; CHECK-NEXT: .LBB13_7: @ %else17 -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: bpl .LBB13_9 -; CHECK-NEXT: .LBB13_8: @ %cond.load19 -; CHECK-NEXT: vldr.16 s4, [r2, #14] -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmov.16 q0[7], r1 -; CHECK-NEXT: .LBB13_9: @ %else20 -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: and r3, r1, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r1, #2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r1, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #2, #1 -; CHECK-NEXT: ubfx r3, r1, #6, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #3, #1 -; CHECK-NEXT: ubfx r3, r1, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #4, #1 -; CHECK-NEXT: ubfx r3, r1, #10, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #5, #1 -; CHECK-NEXT: ubfx r3, r1, #12, #1 -; CHECK-NEXT: ubfx r1, r1, #14, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r2, r3, #6, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r2, r1, #7, #1 -; CHECK-NEXT: uxtb r1, r2 -; CHECK-NEXT: lsls r2, r2, #31 -; CHECK-NEXT: bne .LBB13_25 -; CHECK-NEXT: @ %bb.10: @ %else23 -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: bmi .LBB13_26 -; CHECK-NEXT: .LBB13_11: @ %else25 -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: bmi .LBB13_27 -; CHECK-NEXT: .LBB13_12: @ %else27 -; CHECK-NEXT: lsls r2, r1, #28 -; CHECK-NEXT: bmi .LBB13_28 -; CHECK-NEXT: .LBB13_13: @ %else29 -; CHECK-NEXT: lsls r2, r1, #27 -; CHECK-NEXT: bmi .LBB13_29 -; CHECK-NEXT: .LBB13_14: @ %else31 -; CHECK-NEXT: lsls r2, r1, #26 -; CHECK-NEXT: bmi .LBB13_30 -; CHECK-NEXT: .LBB13_15: @ %else33 -; CHECK-NEXT: lsls r2, r1, #25 -; CHECK-NEXT: bmi .LBB13_31 -; CHECK-NEXT: .LBB13_16: @ %else35 -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: bmi .LBB13_32 -; CHECK-NEXT: .LBB13_17: @ %else37 -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: bx lr -; CHECK-NEXT: .LBB13_18: @ %cond.load -; CHECK-NEXT: vldr.16 s0, [r2] -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: bpl .LBB13_2 -; CHECK-NEXT: .LBB13_19: @ %cond.load1 -; CHECK-NEXT: vldr.16 s4, [r2, #2] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q0[1], r3 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: bpl .LBB13_3 -; CHECK-NEXT: .LBB13_20: @ %cond.load4 -; CHECK-NEXT: vldr.16 s4, [r2, #4] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q0[2], r3 -; CHECK-NEXT: lsls r3, r1, #28 -; CHECK-NEXT: bpl .LBB13_4 -; CHECK-NEXT: .LBB13_21: @ %cond.load7 -; CHECK-NEXT: vldr.16 s4, [r2, #6] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q0[3], r3 -; CHECK-NEXT: lsls r3, r1, #27 -; CHECK-NEXT: bpl .LBB13_5 -; CHECK-NEXT: .LBB13_22: @ %cond.load10 -; CHECK-NEXT: vldr.16 s4, [r2, #8] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q0[4], r3 -; CHECK-NEXT: lsls r3, r1, #26 -; CHECK-NEXT: bpl .LBB13_6 -; CHECK-NEXT: .LBB13_23: @ %cond.load13 -; CHECK-NEXT: vldr.16 s4, [r2, #10] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q0[5], r3 -; CHECK-NEXT: lsls r3, r1, #25 -; CHECK-NEXT: bpl.w .LBB13_7 -; CHECK-NEXT: .LBB13_24: @ %cond.load16 -; CHECK-NEXT: vldr.16 s4, [r2, #12] -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: vmov.16 q0[6], r3 -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: bmi.w .LBB13_8 -; CHECK-NEXT: b .LBB13_9 -; CHECK-NEXT: .LBB13_25: @ %cond.store -; CHECK-NEXT: vstr.16 s0, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: bpl .LBB13_11 -; CHECK-NEXT: .LBB13_26: @ %cond.store24 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vstr.16 s4, [r0, #2] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: bpl .LBB13_12 -; CHECK-NEXT: .LBB13_27: @ %cond.store26 -; CHECK-NEXT: vstr.16 s1, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #28 -; CHECK-NEXT: bpl .LBB13_13 -; CHECK-NEXT: .LBB13_28: @ %cond.store28 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vstr.16 s4, [r0, #6] -; CHECK-NEXT: lsls r2, r1, #27 -; CHECK-NEXT: bpl .LBB13_14 -; CHECK-NEXT: .LBB13_29: @ %cond.store30 -; CHECK-NEXT: vstr.16 s2, [r0, #8] -; CHECK-NEXT: lsls r2, r1, #26 -; CHECK-NEXT: bpl .LBB13_15 -; CHECK-NEXT: .LBB13_30: @ %cond.store32 -; CHECK-NEXT: vmovx.f16 s4, s2 -; CHECK-NEXT: vstr.16 s4, [r0, #10] -; CHECK-NEXT: lsls r2, r1, #25 -; CHECK-NEXT: bpl .LBB13_16 -; CHECK-NEXT: .LBB13_31: @ %cond.store34 -; CHECK-NEXT: vstr.16 s3, [r0, #12] -; CHECK-NEXT: lsls r1, r1, #24 -; CHECK-NEXT: bpl .LBB13_17 -; CHECK-NEXT: .LBB13_32: @ %cond.store36 -; CHECK-NEXT: vmovx.f16 s0, s3 -; CHECK-NEXT: vstr.16 s0, [r0, #14] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: bx lr -entry: - %0 = load <8 x i16>, <8 x i16>* %mask, align 2 - %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x half> @llvm.masked.load.v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef) - call void @llvm.masked.store.v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1) - ret void -} - -declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) -declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) -declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) -declare void @llvm.masked.store.v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) -declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) -declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) -declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) -declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) -declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) -declare <8 x half> @llvm.masked.load.v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) - -declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) -declare void @llvm.masked.store.v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) -declare void @llvm.masked.store.v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) -declare <4 x i16> @llvm.masked.load.v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) -declare <4 x i8> @llvm.masked.load.v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) -declare <8 x i8> @llvm.masked.load.v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src) { +; CHECK-LABEL: foo_v4i32_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrwt.u32 q0, [r2] +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + call void @llvm.masked.store.v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1) + ret void +} + +define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) { +; CHECK-LABEL: foo_sext_v4i32_v4i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r1, r12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #0, #1 +; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #1, #1 +; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #2, #1 +; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: lsls r3, r1, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: ldrbne r3, [r2] +; CHECK-NEXT: vmovne.32 q0[0], r3 +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #1] +; CHECK-NEXT: vmovmi.32 q0[1], r3 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #2] +; CHECK-NEXT: vmovmi.32 q0[2], r3 +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r1, [r2, #3] +; CHECK-NEXT: vmovmi.32 q0[3], r1 +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) + %3 = sext <4 x i8> %2 to <4 x i32> + call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + ret void +} + +define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) { +; CHECK-LABEL: foo_sext_v4i32_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r1, r12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #0, #1 +; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #1, #1 +; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #2, #1 +; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: lsls r3, r1, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: ldrhne r3, [r2] +; CHECK-NEXT: vmovne.32 q0[0], r3 +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrhmi r3, [r2, #2] +; CHECK-NEXT: vmovmi.32 q0[1], r3 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrhmi r3, [r2, #4] +; CHECK-NEXT: vmovmi.32 q0[2], r3 +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrhmi r1, [r2, #6] +; CHECK-NEXT: vmovmi.32 q0[3], r1 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) + %3 = sext <4 x i16> %2 to <4 x i32> + call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + ret void +} + +define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *%src) { +; CHECK-LABEL: foo_zext_v4i32_v4i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vmov.i32 q1, #0xff +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r1, r12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #0, #1 +; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #1, #1 +; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #2, #1 +; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: lsls r3, r1, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: ldrbne r3, [r2] +; CHECK-NEXT: vmovne.32 q0[0], r3 +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #1] +; CHECK-NEXT: vmovmi.32 q0[1], r3 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #2] +; CHECK-NEXT: vmovmi.32 q0[2], r3 +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r1, [r2, #3] +; CHECK-NEXT: vmovmi.32 q0[3], r1 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) + %3 = zext <4 x i8> %2 to <4 x i32> + call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + ret void +} + +define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> *%src) { +; CHECK-LABEL: foo_zext_v4i32_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r1, r12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #0, #1 +; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #1, #1 +; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #2, #1 +; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: lsls r3, r1, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: ldrhne r3, [r2] +; CHECK-NEXT: vmovne.32 q0[0], r3 +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrhmi r3, [r2, #2] +; CHECK-NEXT: vmovmi.32 q0[1], r3 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrhmi r3, [r2, #4] +; CHECK-NEXT: vmovmi.32 q0[2], r3 +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrhmi r1, [r2, #6] +; CHECK-NEXT: vmovmi.32 q0[3], r1 +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) + %3 = zext <4 x i16> %2 to <4 x i32> + call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + ret void +} + +define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src) { +; CHECK-LABEL: foo_v8i16_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vcmp.s16 gt, q0, zr +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrht.u16 q0, [r2] +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %0 = load <8 x i16>, <8 x i16>* %mask, align 2 + %1 = icmp sgt <8 x i16> %0, zeroinitializer + %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) + call void @llvm.masked.store.v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1) + ret void +} + +define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) { +; CHECK-LABEL: foo_sext_v8i16_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s16 gt, q0, zr +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r1, r12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #0, #1 +; CHECK-NEXT: ubfx r1, r12, #2, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #1, #1 +; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #2, #1 +; CHECK-NEXT: ubfx r1, r12, #6, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #4, #1 +; CHECK-NEXT: ubfx r1, r12, #10, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #5, #1 +; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #6, #1 +; CHECK-NEXT: ubfx r1, r12, #14, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #7, #1 +; CHECK-NEXT: uxtb r1, r3 +; CHECK-NEXT: lsls r3, r3, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: ldrbne r3, [r2] +; CHECK-NEXT: vmovne.16 q0[0], r3 +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #1] +; CHECK-NEXT: vmovmi.16 q0[1], r3 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #2] +; CHECK-NEXT: vmovmi.16 q0[2], r3 +; CHECK-NEXT: lsls r3, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #3] +; CHECK-NEXT: vmovmi.16 q0[3], r3 +; CHECK-NEXT: lsls r3, r1, #27 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #4] +; CHECK-NEXT: vmovmi.16 q0[4], r3 +; CHECK-NEXT: lsls r3, r1, #26 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #5] +; CHECK-NEXT: vmovmi.16 q0[5], r3 +; CHECK-NEXT: lsls r3, r1, #25 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #6] +; CHECK-NEXT: vmovmi.16 q0[6], r3 +; CHECK-NEXT: lsls r1, r1, #24 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r1, [r2, #7] +; CHECK-NEXT: vmovmi.16 q0[7], r1 +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: bx lr +entry: + %0 = load <8 x i16>, <8 x i16>* %mask, align 2 + %1 = icmp sgt <8 x i16> %0, zeroinitializer + %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) + %3 = sext <8 x i8> %2 to <8 x i16> + call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) + ret void +} + +define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *%src) { +; CHECK-LABEL: foo_zext_v8i16_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s16 gt, q0, zr +; CHECK-NEXT: @ implicit-def: $q0 +; CHECK-NEXT: vmrs r12, p0 +; CHECK-NEXT: and r1, r12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #0, #1 +; CHECK-NEXT: ubfx r1, r12, #2, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #1, #1 +; CHECK-NEXT: ubfx r1, r12, #4, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #2, #1 +; CHECK-NEXT: ubfx r1, r12, #6, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: ubfx r1, r12, #8, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #4, #1 +; CHECK-NEXT: ubfx r1, r12, #10, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #5, #1 +; CHECK-NEXT: ubfx r1, r12, #12, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #6, #1 +; CHECK-NEXT: ubfx r1, r12, #14, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #7, #1 +; CHECK-NEXT: uxtb r1, r3 +; CHECK-NEXT: lsls r3, r3, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: ldrbne r3, [r2] +; CHECK-NEXT: vmovne.16 q0[0], r3 +; CHECK-NEXT: lsls r3, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #1] +; CHECK-NEXT: vmovmi.16 q0[1], r3 +; CHECK-NEXT: lsls r3, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #2] +; CHECK-NEXT: vmovmi.16 q0[2], r3 +; CHECK-NEXT: lsls r3, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #3] +; CHECK-NEXT: vmovmi.16 q0[3], r3 +; CHECK-NEXT: lsls r3, r1, #27 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #4] +; CHECK-NEXT: vmovmi.16 q0[4], r3 +; CHECK-NEXT: lsls r3, r1, #26 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #5] +; CHECK-NEXT: vmovmi.16 q0[5], r3 +; CHECK-NEXT: lsls r3, r1, #25 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r3, [r2, #6] +; CHECK-NEXT: vmovmi.16 q0[6], r3 +; CHECK-NEXT: lsls r1, r1, #24 +; CHECK-NEXT: itt mi +; CHECK-NEXT: ldrbmi r1, [r2, #7] +; CHECK-NEXT: vmovmi.16 q0[7], r1 +; CHECK-NEXT: vmovlb.u8 q0, q0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: bx lr +entry: + %0 = load <8 x i16>, <8 x i16>* %mask, align 2 + %1 = icmp sgt <8 x i16> %0, zeroinitializer + %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) + %3 = zext <8 x i8> %2 to <8 x i16> + call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) + ret void +} + +define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src) { +; CHECK-LABEL: foo_v16i8_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vcmp.s8 gt, q0, zr +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrbt.u8 q0, [r2] +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %0 = load <16 x i8>, <16 x i8>* %mask, align 1 + %1 = icmp sgt <16 x i8> %0, zeroinitializer + %2 = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef) + call void @llvm.masked.store.v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1) + ret void +} + +define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> *%src) { +; CHECK-LABEL: foo_trunc_v8i8_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s16 gt, q0, zr +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrht.u16 q0, [r2] +; CHECK-NEXT: vmrs r1, p0 +; CHECK-NEXT: and r2, r1, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #0, #1 +; CHECK-NEXT: ubfx r2, r1, #2, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #1, #1 +; CHECK-NEXT: ubfx r2, r1, #4, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #2, #1 +; CHECK-NEXT: ubfx r2, r1, #6, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #3, #1 +; CHECK-NEXT: ubfx r2, r1, #8, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #4, #1 +; CHECK-NEXT: ubfx r2, r1, #10, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #5, #1 +; CHECK-NEXT: ubfx r2, r1, #12, #1 +; CHECK-NEXT: ubfx r1, r1, #14, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #6, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #7, #1 +; CHECK-NEXT: lsls r2, r3, #31 +; CHECK-NEXT: uxtb r1, r3 +; CHECK-NEXT: itt ne +; CHECK-NEXT: vmovne.u16 r2, q0[0] +; CHECK-NEXT: strbne r2, [r0] +; CHECK-NEXT: lsls r2, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi.u16 r2, q0[1] +; CHECK-NEXT: strbmi r2, [r0, #1] +; CHECK-NEXT: lsls r2, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi.u16 r2, q0[2] +; CHECK-NEXT: strbmi r2, [r0, #2] +; CHECK-NEXT: lsls r2, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi.u16 r2, q0[3] +; CHECK-NEXT: strbmi r2, [r0, #3] +; CHECK-NEXT: lsls r2, r1, #27 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi.u16 r2, q0[4] +; CHECK-NEXT: strbmi r2, [r0, #4] +; CHECK-NEXT: lsls r2, r1, #26 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi.u16 r2, q0[5] +; CHECK-NEXT: strbmi r2, [r0, #5] +; CHECK-NEXT: lsls r2, r1, #25 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi.u16 r2, q0[6] +; CHECK-NEXT: strbmi r2, [r0, #6] +; CHECK-NEXT: lsls r1, r1, #24 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi.u16 r1, q0[7] +; CHECK-NEXT: strbmi r1, [r0, #7] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: bx lr +entry: + %0 = load <8 x i16>, <8 x i16>* %mask, align 2 + %1 = icmp sgt <8 x i16> %0, zeroinitializer + %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) + %3 = trunc <8 x i16> %2 to <8 x i8> + call void @llvm.masked.store.v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1) + ret void +} + +define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> *%src) { +; CHECK-LABEL: foo_trunc_v4i8_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q0, [r2] +; CHECK-NEXT: vmrs r1, p0 +; CHECK-NEXT: and r2, r1, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #0, #1 +; CHECK-NEXT: ubfx r2, r1, #4, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #1, #1 +; CHECK-NEXT: ubfx r2, r1, #8, #1 +; CHECK-NEXT: ubfx r1, r1, #12, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #2, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: lsls r2, r1, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: vmovne r2, s0 +; CHECK-NEXT: strbne r2, [r0] +; CHECK-NEXT: lsls r2, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi r2, s1 +; CHECK-NEXT: strbmi r2, [r0, #1] +; CHECK-NEXT: lsls r2, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi r2, s2 +; CHECK-NEXT: strbmi r2, [r0, #2] +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi r1, s3 +; CHECK-NEXT: strbmi r1, [r0, #3] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + %3 = trunc <4 x i32> %2 to <4 x i8> + call void @llvm.masked.store.v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1) + ret void +} + +define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> *%src) { +; CHECK-LABEL: foo_trunc_v4i16_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q0, [r2] +; CHECK-NEXT: vmrs r1, p0 +; CHECK-NEXT: and r2, r1, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #0, #1 +; CHECK-NEXT: ubfx r2, r1, #4, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #1, #1 +; CHECK-NEXT: ubfx r2, r1, #8, #1 +; CHECK-NEXT: ubfx r1, r1, #12, #1 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: bfi r3, r2, #2, #1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: bfi r3, r1, #3, #1 +; CHECK-NEXT: and r1, r3, #15 +; CHECK-NEXT: lsls r2, r1, #31 +; CHECK-NEXT: itt ne +; CHECK-NEXT: vmovne r2, s0 +; CHECK-NEXT: strhne r2, [r0] +; CHECK-NEXT: lsls r2, r1, #30 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi r2, s1 +; CHECK-NEXT: strhmi r2, [r0, #2] +; CHECK-NEXT: lsls r2, r1, #29 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi r2, s2 +; CHECK-NEXT: strhmi r2, [r0, #4] +; CHECK-NEXT: lsls r1, r1, #28 +; CHECK-NEXT: itt mi +; CHECK-NEXT: vmovmi r1, s3 +; CHECK-NEXT: strhmi r1, [r0, #6] +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + %3 = trunc <4 x i32> %2 to <4 x i16> + call void @llvm.masked.store.v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1) + ret void +} + +define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> *%src) { +; CHECK-LABEL: foo_v4f32_v4f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vcmp.s32 gt, q0, zr +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrwt.u32 q0, [r2] +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %0 = load <4 x i32>, <4 x i32>* %mask, align 4 + %1 = icmp sgt <4 x i32> %0, zeroinitializer + %2 = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef) + call void @llvm.masked.store.v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1) + ret void +} + +define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%src) { +; CHECK-LABEL: foo_v8f16_v8f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vcmp.s16 gt, q0, zr +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrht.u16 q0, [r2] +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %0 = load <8 x i16>, <8 x i16>* %mask, align 2 + %1 = icmp sgt <8 x i16> %0, zeroinitializer + %2 = call <8 x half> @llvm.masked.load.v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef) + call void @llvm.masked.store.v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1) + ret void +} + +declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare <4 x i16> @llvm.masked.load.v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) diff --git a/test/CodeGen/Thumb2/mve-masked-load.ll b/test/CodeGen/Thumb2/mve-masked-load.ll dissimilarity index 82% index b1d048ecdbd..9d73c8241a2 100644 --- a/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/test/CodeGen/Thumb2/mve-masked-load.ll @@ -1,4451 +1,952 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE -; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE - -define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_zero(<4 x i32> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_align4_zero: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: mov.w r12, #0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #3, #1 -; CHECK-LE-NEXT: and r1, r2, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: beq .LBB0_2 -; CHECK-LE-NEXT: @ %bb.1: @ %cond.load -; CHECK-LE-NEXT: ldr r2, [r0] -; CHECK-LE-NEXT: vdup.32 q0, r12 -; CHECK-LE-NEXT: vmov.32 q0[0], r2 -; CHECK-LE-NEXT: b .LBB0_3 -; CHECK-LE-NEXT: .LBB0_2: -; CHECK-LE-NEXT: vmov.i32 q0, #0x0 -; CHECK-LE-NEXT: .LBB0_3: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r0, [r0, #12] -; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_align4_zero: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: mov.w r12, #0 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #3, #1 -; CHECK-BE-NEXT: and r1, r2, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: beq .LBB0_2 -; CHECK-BE-NEXT: @ %bb.1: @ %cond.load -; CHECK-BE-NEXT: ldr r2, [r0] -; CHECK-BE-NEXT: vdup.32 q1, r12 -; CHECK-BE-NEXT: vmov.32 q1[0], r2 -; CHECK-BE-NEXT: b .LBB0_3 -; CHECK-BE-NEXT: .LBB0_2: -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: .LBB0_3: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.32 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.32 q1[2], r2 -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r0, [r0, #12] -; CHECK-BE-NEXT: vmovmi.32 q1[3], r0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> zeroinitializer) - ret <4 x i32> %l -} - -define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_undef(<4 x i32> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_align4_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r2, [r0] -; CHECK-LE-NEXT: vmovne.32 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r0, [r0, #12] -; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_align4_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrne r2, [r0] -; CHECK-BE-NEXT: vmovne.32 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.32 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.32 q1[2], r2 -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r0, [r0, #12] -; CHECK-BE-NEXT: vmovmi.32 q1[3], r0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> undef) - ret <4 x i32> %l -} - -define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align1_undef(<4 x i32> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_align1_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r2, [r0] -; CHECK-LE-NEXT: vmovne.32 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r0, [r0, #12] -; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_align1_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrne r2, [r0] -; CHECK-BE-NEXT: vmovne.32 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.32 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.32 q1[2], r2 -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r0, [r0, #12] -; CHECK-BE-NEXT: vmovmi.32 q1[3], r0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 1, <4 x i1> %c, <4 x i32> undef) - ret <4 x i32> %l -} - -define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_other(<4 x i32> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_align4_other: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r2, [r0] -; CHECK-LE-NEXT: vmovne.32 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r0, [r0, #12] -; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_align4_other: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrne r2, [r0] -; CHECK-BE-NEXT: vmovne.32 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.32 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.32 q1[2], r2 -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r0, [r0, #12] -; CHECK-BE-NEXT: vmovmi.32 q1[3], r0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> %a) - ret <4 x i32> %l -} - -define arm_aapcs_vfpcc i8* @masked_v4i32_preinc(i8* %x, i8* %y, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_preinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r2, r2, #15 -; CHECK-LE-NEXT: lsls r3, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r3, [r0] -; CHECK-LE-NEXT: vmovne.32 q0[0], r3 -; CHECK-LE-NEXT: lsls r3, r2, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r3, [r0, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[1], r3 -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r3, [r0, #8] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r3 -; CHECK-LE-NEXT: lsls r2, r2, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.32 q0[3], r2 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_preinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r2, r2, #15 -; CHECK-BE-NEXT: lsls r3, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrne r3, [r0] -; CHECK-BE-NEXT: vmovne.32 q0[0], r3 -; CHECK-BE-NEXT: lsls r3, r2, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r3, [r0, #4] -; CHECK-BE-NEXT: vmovmi.32 q0[1], r3 -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r3, [r0, #8] -; CHECK-BE-NEXT: vmovmi.32 q0[2], r3 -; CHECK-BE-NEXT: lsls r2, r2, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.32 q0[3], r2 -; CHECK-BE-NEXT: vstrw.32 q0, [r1] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %z to <4 x i32>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) - %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 4 - ret i8* %z -} - -define arm_aapcs_vfpcc i8* @masked_v4i32_postinc(i8* %x, i8* %y, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_postinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: add.w r12, r0, #4 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r3, r2, #15 -; CHECK-LE-NEXT: lsls r2, r3, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r2, [r0] -; CHECK-LE-NEXT: vmovne.32 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r3, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r3, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r3, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r0, [r0, #12] -; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: mov r0, r12 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_postinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: add.w r12, r0, #4 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r3, r2, #15 -; CHECK-BE-NEXT: lsls r2, r3, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrne r2, [r0] -; CHECK-BE-NEXT: vmovne.32 q0[0], r2 -; CHECK-BE-NEXT: lsls r2, r3, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.32 q0[1], r2 -; CHECK-BE-NEXT: lsls r2, r3, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.32 q0[2], r2 -; CHECK-BE-NEXT: lsls r2, r3, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r0, [r0, #12] -; CHECK-BE-NEXT: vmovmi.32 q0[3], r0 -; CHECK-BE-NEXT: vstrw.32 q0, [r1] -; CHECK-BE-NEXT: mov r0, r12 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %x to <4 x i32>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) - %2 = bitcast i8* %y to <4 x i32>* - store <4 x i32> %1, <4 x i32>* %2, align 4 - ret i8* %z -} - - - -define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_zero(<8 x i16> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_align4_zero: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: mov.w r12, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r2, r3, #0 -; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: bfi r3, r2, #0, #1 -; CHECK-LE-NEXT: ubfx r2, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #1, #1 -; CHECK-LE-NEXT: ubfx r2, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #2, #1 -; CHECK-LE-NEXT: ubfx r2, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #3, #1 -; CHECK-LE-NEXT: ubfx r2, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #4, #1 -; CHECK-LE-NEXT: ubfx r2, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #5, #1 -; CHECK-LE-NEXT: ubfx r2, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r3, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r3 -; CHECK-LE-NEXT: lsls r2, r3, #31 -; CHECK-LE-NEXT: beq .LBB6_2 -; CHECK-LE-NEXT: @ %bb.1: @ %cond.load -; CHECK-LE-NEXT: ldrh r2, [r0] -; CHECK-LE-NEXT: vdup.16 q0, r12 -; CHECK-LE-NEXT: vmov.16 q0[0], r2 -; CHECK-LE-NEXT: b .LBB6_3 -; CHECK-LE-NEXT: .LBB6_2: -; CHECK-LE-NEXT: vmov.i32 q0, #0x0 -; CHECK-LE-NEXT: .LBB6_3: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-LE-NEXT: vmovmi.16 q0[7], r0 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_align4_zero: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: mov.w r12, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r2, r3, #0 -; CHECK-BE-NEXT: movs r3, #0 -; CHECK-BE-NEXT: bfi r3, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #2, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #3, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #4, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #5, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r3, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r3 -; CHECK-BE-NEXT: lsls r2, r3, #31 -; CHECK-BE-NEXT: beq .LBB6_2 -; CHECK-BE-NEXT: @ %bb.1: @ %cond.load -; CHECK-BE-NEXT: ldrh r2, [r0] -; CHECK-BE-NEXT: vdup.16 q1, r12 -; CHECK-BE-NEXT: vmov.16 q1[0], r2 -; CHECK-BE-NEXT: b .LBB6_3 -; CHECK-BE-NEXT: .LBB6_2: -; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vrev32.16 q1, q0 -; CHECK-BE-NEXT: .LBB6_3: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.16 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.16 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.16 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.16 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.16 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.16 q1[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-BE-NEXT: vmovmi.16 q1[7], r0 -; CHECK-BE-NEXT: vrev64.16 q0, q1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> zeroinitializer) - ret <8 x i16> %l -} - -define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_undef(<8 x i16> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_align4_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrhne r2, [r0] -; CHECK-LE-NEXT: vmovne.16 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-LE-NEXT: vmovmi.16 q0[7], r0 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_align4_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrhne r2, [r0] -; CHECK-BE-NEXT: vmovne.16 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.16 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.16 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.16 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.16 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.16 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.16 q1[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-BE-NEXT: vmovmi.16 q1[7], r0 -; CHECK-BE-NEXT: vrev64.16 q0, q1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> undef) - ret <8 x i16> %l -} - -define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align1_undef(<8 x i16> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_align1_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrhne r2, [r0] -; CHECK-LE-NEXT: vmovne.16 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-LE-NEXT: vmovmi.16 q0[7], r0 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_align1_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrhne r2, [r0] -; CHECK-BE-NEXT: vmovne.16 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.16 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.16 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.16 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.16 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.16 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.16 q1[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-BE-NEXT: vmovmi.16 q1[7], r0 -; CHECK-BE-NEXT: vrev64.16 q0, q1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 1, <8 x i1> %c, <8 x i16> undef) - ret <8 x i16> %l -} - -define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_other(<8 x i16> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_align4_other: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrhne r2, [r0] -; CHECK-LE-NEXT: vmovne.16 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-LE-NEXT: vmovmi.16 q0[7], r0 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_align4_other: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrhne r2, [r0] -; CHECK-BE-NEXT: vmovne.16 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.16 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.16 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.16 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.16 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.16 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.16 q1[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-BE-NEXT: vmovmi.16 q1[7], r0 -; CHECK-BE-NEXT: vrev64.16 q0, q1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> %a) - ret <8 x i16> %l -} - -define i8* @masked_v8i16_preinc(i8* %x, i8* %y, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_preinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r2, r12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #0, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #1, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #2, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #3, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #4, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #5, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #6, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #7, #1 -; CHECK-LE-NEXT: uxtb r2, r3 -; CHECK-LE-NEXT: lsls r3, r3, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrhne r3, [r0] -; CHECK-LE-NEXT: vmovne.16 q0[0], r3 -; CHECK-LE-NEXT: lsls r3, r2, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r3, [r0, #2] -; CHECK-LE-NEXT: vmovmi.16 q0[1], r3 -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r3, [r0, #4] -; CHECK-LE-NEXT: vmovmi.16 q0[2], r3 -; CHECK-LE-NEXT: lsls r3, r2, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r3, [r0, #6] -; CHECK-LE-NEXT: vmovmi.16 q0[3], r3 -; CHECK-LE-NEXT: lsls r3, r2, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r3, [r0, #8] -; CHECK-LE-NEXT: vmovmi.16 q0[4], r3 -; CHECK-LE-NEXT: lsls r3, r2, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r3, [r0, #10] -; CHECK-LE-NEXT: vmovmi.16 q0[5], r3 -; CHECK-LE-NEXT: lsls r3, r2, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r3, [r0, #12] -; CHECK-LE-NEXT: vmovmi.16 q0[6], r3 -; CHECK-LE-NEXT: lsls r2, r2, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #14] -; CHECK-LE-NEXT: vmovmi.16 q0[7], r2 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_preinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r3, #0 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r2, r12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #2, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #3, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #4, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #5, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #6, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #7, #1 -; CHECK-BE-NEXT: uxtb r2, r3 -; CHECK-BE-NEXT: lsls r3, r3, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrhne r3, [r0] -; CHECK-BE-NEXT: vmovne.16 q0[0], r3 -; CHECK-BE-NEXT: lsls r3, r2, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r3, [r0, #2] -; CHECK-BE-NEXT: vmovmi.16 q0[1], r3 -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r3, [r0, #4] -; CHECK-BE-NEXT: vmovmi.16 q0[2], r3 -; CHECK-BE-NEXT: lsls r3, r2, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r3, [r0, #6] -; CHECK-BE-NEXT: vmovmi.16 q0[3], r3 -; CHECK-BE-NEXT: lsls r3, r2, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r3, [r0, #8] -; CHECK-BE-NEXT: vmovmi.16 q0[4], r3 -; CHECK-BE-NEXT: lsls r3, r2, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r3, [r0, #10] -; CHECK-BE-NEXT: vmovmi.16 q0[5], r3 -; CHECK-BE-NEXT: lsls r3, r2, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r3, [r0, #12] -; CHECK-BE-NEXT: vmovmi.16 q0[6], r3 -; CHECK-BE-NEXT: lsls r2, r2, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #14] -; CHECK-BE-NEXT: vmovmi.16 q0[7], r2 -; CHECK-BE-NEXT: vstrh.16 q0, [r1] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %z to <8 x i16>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 4, <8 x i1> %c, <8 x i16> undef) - %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 4 - ret i8* %z -} - -define arm_aapcs_vfpcc i8* @masked_v8i16_postinc(i8* %x, i8* %y, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_postinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: add.w r12, r0, #4 -; CHECK-LE-NEXT: bfi r2, r3, #7, #1 -; CHECK-LE-NEXT: uxtb r3, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrhne r2, [r0] -; CHECK-LE-NEXT: vmovne.16 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r3, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r3, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r3, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r3, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r3, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r3, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r2, r3, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-LE-NEXT: vmovmi.16 q0[7], r0 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: mov r0, r12 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_postinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: add.w r12, r0, #4 -; CHECK-BE-NEXT: bfi r2, r3, #7, #1 -; CHECK-BE-NEXT: uxtb r3, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrhne r2, [r0] -; CHECK-BE-NEXT: vmovne.16 q0[0], r2 -; CHECK-BE-NEXT: lsls r2, r3, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.16 q0[1], r2 -; CHECK-BE-NEXT: lsls r2, r3, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.16 q0[2], r2 -; CHECK-BE-NEXT: lsls r2, r3, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.16 q0[3], r2 -; CHECK-BE-NEXT: lsls r2, r3, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.16 q0[4], r2 -; CHECK-BE-NEXT: lsls r2, r3, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.16 q0[5], r2 -; CHECK-BE-NEXT: lsls r2, r3, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.16 q0[6], r2 -; CHECK-BE-NEXT: lsls r2, r3, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrhmi r0, [r0, #14] -; CHECK-BE-NEXT: vmovmi.16 q0[7], r0 -; CHECK-BE-NEXT: vstrh.16 q0, [r1] -; CHECK-BE-NEXT: mov r0, r12 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %x to <8 x i16>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 4, <8 x i1> %c, <8 x i16> undef) - %2 = bitcast i8* %y to <8 x i16>* - store <8 x i16> %1, <8 x i16>* %2, align 4 - ret i8* %z -} - - -define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_zero(<16 x i8> *%dest, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8_align4_zero: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: uxth r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: beq .LBB12_2 -; CHECK-LE-NEXT: @ %bb.1: @ %cond.load -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: ldrb r3, [r0] -; CHECK-LE-NEXT: vdup.8 q0, r2 -; CHECK-LE-NEXT: vmov.8 q0[0], r3 -; CHECK-LE-NEXT: b .LBB12_3 -; CHECK-LE-NEXT: .LBB12_2: -; CHECK-LE-NEXT: vmov.i32 q0, #0x0 -; CHECK-LE-NEXT: .LBB12_3: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-LE-NEXT: vmovmi.8 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.8 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-LE-NEXT: vmovmi.8 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.8 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-LE-NEXT: vmovmi.8 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.8 q0[6], r2 -; CHECK-LE-NEXT: lsls r2, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-LE-NEXT: vmovmi.8 q0[7], r2 -; CHECK-LE-NEXT: lsls r2, r1, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.8 q0[8], r2 -; CHECK-LE-NEXT: lsls r2, r1, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-LE-NEXT: vmovmi.8 q0[9], r2 -; CHECK-LE-NEXT: lsls r2, r1, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.8 q0[10], r2 -; CHECK-LE-NEXT: lsls r2, r1, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-LE-NEXT: vmovmi.8 q0[11], r2 -; CHECK-LE-NEXT: lsls r2, r1, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.8 q0[12], r2 -; CHECK-LE-NEXT: lsls r2, r1, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-LE-NEXT: vmovmi.8 q0[13], r2 -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: lsls r2, r1, #17 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-LE-NEXT: vmovmi.8 q0[14], r2 -; CHECK-LE-NEXT: lsls r1, r1, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-LE-NEXT: vmovmi.8 q0[15], r0 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8_align4_zero: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: uxth r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: beq .LBB12_2 -; CHECK-BE-NEXT: @ %bb.1: @ %cond.load -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: ldrb r3, [r0] -; CHECK-BE-NEXT: vdup.8 q1, r2 -; CHECK-BE-NEXT: vmov.8 q1[0], r3 -; CHECK-BE-NEXT: b .LBB12_3 -; CHECK-BE-NEXT: .LBB12_2: -; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vrev32.8 q1, q0 -; CHECK-BE-NEXT: .LBB12_3: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-BE-NEXT: vmovmi.8 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.8 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-BE-NEXT: vmovmi.8 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.8 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-BE-NEXT: vmovmi.8 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.8 q1[6], r2 -; CHECK-BE-NEXT: lsls r2, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-BE-NEXT: vmovmi.8 q1[7], r2 -; CHECK-BE-NEXT: lsls r2, r1, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.8 q1[8], r2 -; CHECK-BE-NEXT: lsls r2, r1, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-BE-NEXT: vmovmi.8 q1[9], r2 -; CHECK-BE-NEXT: lsls r2, r1, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.8 q1[10], r2 -; CHECK-BE-NEXT: lsls r2, r1, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-BE-NEXT: vmovmi.8 q1[11], r2 -; CHECK-BE-NEXT: lsls r2, r1, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.8 q1[12], r2 -; CHECK-BE-NEXT: lsls r2, r1, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-BE-NEXT: vmovmi.8 q1[13], r2 -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: lsls r2, r1, #17 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-BE-NEXT: vmovmi.8 q1[14], r2 -; CHECK-BE-NEXT: lsls r1, r1, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-BE-NEXT: vmovmi.8 q1[15], r0 -; CHECK-BE-NEXT: vrev64.8 q0, q1 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %c = icmp sgt <16 x i8> %a, zeroinitializer - %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> zeroinitializer) - ret <16 x i8> %l -} - -define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_undef(<16 x i8> *%dest, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8_align4_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: uxth r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrbne r2, [r0] -; CHECK-LE-NEXT: vmovne.8 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-LE-NEXT: vmovmi.8 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.8 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-LE-NEXT: vmovmi.8 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.8 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-LE-NEXT: vmovmi.8 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.8 q0[6], r2 -; CHECK-LE-NEXT: lsls r2, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-LE-NEXT: vmovmi.8 q0[7], r2 -; CHECK-LE-NEXT: lsls r2, r1, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.8 q0[8], r2 -; CHECK-LE-NEXT: lsls r2, r1, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-LE-NEXT: vmovmi.8 q0[9], r2 -; CHECK-LE-NEXT: lsls r2, r1, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.8 q0[10], r2 -; CHECK-LE-NEXT: lsls r2, r1, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-LE-NEXT: vmovmi.8 q0[11], r2 -; CHECK-LE-NEXT: lsls r2, r1, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.8 q0[12], r2 -; CHECK-LE-NEXT: lsls r2, r1, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-LE-NEXT: vmovmi.8 q0[13], r2 -; CHECK-LE-NEXT: lsls r2, r1, #17 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-LE-NEXT: vmovmi.8 q0[14], r2 -; CHECK-LE-NEXT: lsls r1, r1, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-LE-NEXT: vmovmi.8 q0[15], r0 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8_align4_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: uxth r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrbne r2, [r0] -; CHECK-BE-NEXT: vmovne.8 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-BE-NEXT: vmovmi.8 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.8 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-BE-NEXT: vmovmi.8 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.8 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-BE-NEXT: vmovmi.8 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.8 q1[6], r2 -; CHECK-BE-NEXT: lsls r2, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-BE-NEXT: vmovmi.8 q1[7], r2 -; CHECK-BE-NEXT: lsls r2, r1, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.8 q1[8], r2 -; CHECK-BE-NEXT: lsls r2, r1, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-BE-NEXT: vmovmi.8 q1[9], r2 -; CHECK-BE-NEXT: lsls r2, r1, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.8 q1[10], r2 -; CHECK-BE-NEXT: lsls r2, r1, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-BE-NEXT: vmovmi.8 q1[11], r2 -; CHECK-BE-NEXT: lsls r2, r1, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.8 q1[12], r2 -; CHECK-BE-NEXT: lsls r2, r1, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-BE-NEXT: vmovmi.8 q1[13], r2 -; CHECK-BE-NEXT: lsls r2, r1, #17 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-BE-NEXT: vmovmi.8 q1[14], r2 -; CHECK-BE-NEXT: lsls r1, r1, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-BE-NEXT: vmovmi.8 q1[15], r0 -; CHECK-BE-NEXT: vrev64.8 q0, q1 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %c = icmp sgt <16 x i8> %a, zeroinitializer - %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> undef) - ret <16 x i8> %l -} - -define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_other(<16 x i8> *%dest, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8_align4_other: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: uxth r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrbne r2, [r0] -; CHECK-LE-NEXT: vmovne.8 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-LE-NEXT: vmovmi.8 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.8 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-LE-NEXT: vmovmi.8 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.8 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-LE-NEXT: vmovmi.8 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.8 q0[6], r2 -; CHECK-LE-NEXT: lsls r2, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-LE-NEXT: vmovmi.8 q0[7], r2 -; CHECK-LE-NEXT: lsls r2, r1, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.8 q0[8], r2 -; CHECK-LE-NEXT: lsls r2, r1, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-LE-NEXT: vmovmi.8 q0[9], r2 -; CHECK-LE-NEXT: lsls r2, r1, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.8 q0[10], r2 -; CHECK-LE-NEXT: lsls r2, r1, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-LE-NEXT: vmovmi.8 q0[11], r2 -; CHECK-LE-NEXT: lsls r2, r1, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.8 q0[12], r2 -; CHECK-LE-NEXT: lsls r2, r1, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-LE-NEXT: vmovmi.8 q0[13], r2 -; CHECK-LE-NEXT: lsls r2, r1, #17 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-LE-NEXT: vmovmi.8 q0[14], r2 -; CHECK-LE-NEXT: lsls r1, r1, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-LE-NEXT: vmovmi.8 q0[15], r0 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8_align4_other: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: uxth r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrbne r2, [r0] -; CHECK-BE-NEXT: vmovne.8 q1[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-BE-NEXT: vmovmi.8 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.8 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-BE-NEXT: vmovmi.8 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.8 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-BE-NEXT: vmovmi.8 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.8 q1[6], r2 -; CHECK-BE-NEXT: lsls r2, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-BE-NEXT: vmovmi.8 q1[7], r2 -; CHECK-BE-NEXT: lsls r2, r1, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.8 q1[8], r2 -; CHECK-BE-NEXT: lsls r2, r1, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-BE-NEXT: vmovmi.8 q1[9], r2 -; CHECK-BE-NEXT: lsls r2, r1, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.8 q1[10], r2 -; CHECK-BE-NEXT: lsls r2, r1, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-BE-NEXT: vmovmi.8 q1[11], r2 -; CHECK-BE-NEXT: lsls r2, r1, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.8 q1[12], r2 -; CHECK-BE-NEXT: lsls r2, r1, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-BE-NEXT: vmovmi.8 q1[13], r2 -; CHECK-BE-NEXT: lsls r2, r1, #17 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-BE-NEXT: vmovmi.8 q1[14], r2 -; CHECK-BE-NEXT: lsls r1, r1, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-BE-NEXT: vmovmi.8 q1[15], r0 -; CHECK-BE-NEXT: vrev64.8 q0, q1 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %c = icmp sgt <16 x i8> %a, zeroinitializer - %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> %a) - ret <16 x i8> %l -} - -define arm_aapcs_vfpcc i8* @masked_v16i8_preinc(i8* %x, i8* %y, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8_preinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: vmrs r3, p0 -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: uxth r2, r3 -; CHECK-LE-NEXT: lsls r3, r3, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrbne r3, [r0] -; CHECK-LE-NEXT: vmovne.8 q0[0], r3 -; CHECK-LE-NEXT: lsls r3, r2, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #1] -; CHECK-LE-NEXT: vmovmi.8 q0[1], r3 -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #2] -; CHECK-LE-NEXT: vmovmi.8 q0[2], r3 -; CHECK-LE-NEXT: lsls r3, r2, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #3] -; CHECK-LE-NEXT: vmovmi.8 q0[3], r3 -; CHECK-LE-NEXT: lsls r3, r2, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #4] -; CHECK-LE-NEXT: vmovmi.8 q0[4], r3 -; CHECK-LE-NEXT: lsls r3, r2, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #5] -; CHECK-LE-NEXT: vmovmi.8 q0[5], r3 -; CHECK-LE-NEXT: lsls r3, r2, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #6] -; CHECK-LE-NEXT: vmovmi.8 q0[6], r3 -; CHECK-LE-NEXT: lsls r3, r2, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #7] -; CHECK-LE-NEXT: vmovmi.8 q0[7], r3 -; CHECK-LE-NEXT: lsls r3, r2, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #8] -; CHECK-LE-NEXT: vmovmi.8 q0[8], r3 -; CHECK-LE-NEXT: lsls r3, r2, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #9] -; CHECK-LE-NEXT: vmovmi.8 q0[9], r3 -; CHECK-LE-NEXT: lsls r3, r2, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #10] -; CHECK-LE-NEXT: vmovmi.8 q0[10], r3 -; CHECK-LE-NEXT: lsls r3, r2, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #11] -; CHECK-LE-NEXT: vmovmi.8 q0[11], r3 -; CHECK-LE-NEXT: lsls r3, r2, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #12] -; CHECK-LE-NEXT: vmovmi.8 q0[12], r3 -; CHECK-LE-NEXT: lsls r3, r2, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #13] -; CHECK-LE-NEXT: vmovmi.8 q0[13], r3 -; CHECK-LE-NEXT: lsls r3, r2, #17 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r3, [r0, #14] -; CHECK-LE-NEXT: vmovmi.8 q0[14], r3 -; CHECK-LE-NEXT: lsls r2, r2, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #15] -; CHECK-LE-NEXT: vmovmi.8 q0[15], r2 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8_preinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: vmrs r3, p0 -; CHECK-BE-NEXT: uxth r2, r3 -; CHECK-BE-NEXT: lsls r3, r3, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrbne r3, [r0] -; CHECK-BE-NEXT: vmovne.8 q0[0], r3 -; CHECK-BE-NEXT: lsls r3, r2, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #1] -; CHECK-BE-NEXT: vmovmi.8 q0[1], r3 -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #2] -; CHECK-BE-NEXT: vmovmi.8 q0[2], r3 -; CHECK-BE-NEXT: lsls r3, r2, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #3] -; CHECK-BE-NEXT: vmovmi.8 q0[3], r3 -; CHECK-BE-NEXT: lsls r3, r2, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #4] -; CHECK-BE-NEXT: vmovmi.8 q0[4], r3 -; CHECK-BE-NEXT: lsls r3, r2, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #5] -; CHECK-BE-NEXT: vmovmi.8 q0[5], r3 -; CHECK-BE-NEXT: lsls r3, r2, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #6] -; CHECK-BE-NEXT: vmovmi.8 q0[6], r3 -; CHECK-BE-NEXT: lsls r3, r2, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #7] -; CHECK-BE-NEXT: vmovmi.8 q0[7], r3 -; CHECK-BE-NEXT: lsls r3, r2, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #8] -; CHECK-BE-NEXT: vmovmi.8 q0[8], r3 -; CHECK-BE-NEXT: lsls r3, r2, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #9] -; CHECK-BE-NEXT: vmovmi.8 q0[9], r3 -; CHECK-BE-NEXT: lsls r3, r2, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #10] -; CHECK-BE-NEXT: vmovmi.8 q0[10], r3 -; CHECK-BE-NEXT: lsls r3, r2, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #11] -; CHECK-BE-NEXT: vmovmi.8 q0[11], r3 -; CHECK-BE-NEXT: lsls r3, r2, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #12] -; CHECK-BE-NEXT: vmovmi.8 q0[12], r3 -; CHECK-BE-NEXT: lsls r3, r2, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #13] -; CHECK-BE-NEXT: vmovmi.8 q0[13], r3 -; CHECK-BE-NEXT: lsls r3, r2, #17 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r3, [r0, #14] -; CHECK-BE-NEXT: vmovmi.8 q0[14], r3 -; CHECK-BE-NEXT: lsls r2, r2, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #15] -; CHECK-BE-NEXT: vmovmi.8 q0[15], r2 -; CHECK-BE-NEXT: vstrb.8 q0, [r1] -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %z to <16 x i8>* - %c = icmp sgt <16 x i8> %a, zeroinitializer - %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 4, <16 x i1> %c, <16 x i8> undef) - %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 4 - ret i8* %z -} - -define arm_aapcs_vfpcc i8* @masked_v16i8_postinc(i8* %x, i8* %y, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8_postinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: add.w r12, r0, #4 -; CHECK-LE-NEXT: uxth r3, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrbne r2, [r0] -; CHECK-LE-NEXT: vmovne.8 q0[0], r2 -; CHECK-LE-NEXT: lsls r2, r3, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-LE-NEXT: vmovmi.8 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r3, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-LE-NEXT: vmovmi.8 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r3, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-LE-NEXT: vmovmi.8 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r3, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi.8 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r3, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-LE-NEXT: vmovmi.8 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r3, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-LE-NEXT: vmovmi.8 q0[6], r2 -; CHECK-LE-NEXT: lsls r2, r3, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-LE-NEXT: vmovmi.8 q0[7], r2 -; CHECK-LE-NEXT: lsls r2, r3, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi.8 q0[8], r2 -; CHECK-LE-NEXT: lsls r2, r3, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-LE-NEXT: vmovmi.8 q0[9], r2 -; CHECK-LE-NEXT: lsls r2, r3, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-LE-NEXT: vmovmi.8 q0[10], r2 -; CHECK-LE-NEXT: lsls r2, r3, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-LE-NEXT: vmovmi.8 q0[11], r2 -; CHECK-LE-NEXT: lsls r2, r3, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-LE-NEXT: vmovmi.8 q0[12], r2 -; CHECK-LE-NEXT: lsls r2, r3, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-LE-NEXT: vmovmi.8 q0[13], r2 -; CHECK-LE-NEXT: lsls r2, r3, #17 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-LE-NEXT: vmovmi.8 q0[14], r2 -; CHECK-LE-NEXT: lsls r2, r3, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-LE-NEXT: vmovmi.8 q0[15], r0 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: mov r0, r12 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8_postinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: add.w r12, r0, #4 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: uxth r3, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrbne r2, [r0] -; CHECK-BE-NEXT: vmovne.8 q0[0], r2 -; CHECK-BE-NEXT: lsls r2, r3, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #1] -; CHECK-BE-NEXT: vmovmi.8 q0[1], r2 -; CHECK-BE-NEXT: lsls r2, r3, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #2] -; CHECK-BE-NEXT: vmovmi.8 q0[2], r2 -; CHECK-BE-NEXT: lsls r2, r3, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #3] -; CHECK-BE-NEXT: vmovmi.8 q0[3], r2 -; CHECK-BE-NEXT: lsls r2, r3, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi.8 q0[4], r2 -; CHECK-BE-NEXT: lsls r2, r3, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #5] -; CHECK-BE-NEXT: vmovmi.8 q0[5], r2 -; CHECK-BE-NEXT: lsls r2, r3, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #6] -; CHECK-BE-NEXT: vmovmi.8 q0[6], r2 -; CHECK-BE-NEXT: lsls r2, r3, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #7] -; CHECK-BE-NEXT: vmovmi.8 q0[7], r2 -; CHECK-BE-NEXT: lsls r2, r3, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi.8 q0[8], r2 -; CHECK-BE-NEXT: lsls r2, r3, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #9] -; CHECK-BE-NEXT: vmovmi.8 q0[9], r2 -; CHECK-BE-NEXT: lsls r2, r3, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #10] -; CHECK-BE-NEXT: vmovmi.8 q0[10], r2 -; CHECK-BE-NEXT: lsls r2, r3, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #11] -; CHECK-BE-NEXT: vmovmi.8 q0[11], r2 -; CHECK-BE-NEXT: lsls r2, r3, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #12] -; CHECK-BE-NEXT: vmovmi.8 q0[12], r2 -; CHECK-BE-NEXT: lsls r2, r3, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #13] -; CHECK-BE-NEXT: vmovmi.8 q0[13], r2 -; CHECK-BE-NEXT: lsls r2, r3, #17 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r2, [r0, #14] -; CHECK-BE-NEXT: vmovmi.8 q0[14], r2 -; CHECK-BE-NEXT: lsls r2, r3, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrbmi r0, [r0, #15] -; CHECK-BE-NEXT: vmovmi.8 q0[15], r0 -; CHECK-BE-NEXT: vstrb.8 q0, [r1] -; CHECK-BE-NEXT: mov r0, r12 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %x to <16 x i8>* - %c = icmp sgt <16 x i8> %a, zeroinitializer - %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 4, <16 x i1> %c, <16 x i8> undef) - %2 = bitcast i8* %y to <16 x i8>* - store <16 x i8> %1, <16 x i8>* %2, align 4 - ret i8* %z -} - - -define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_zero(<4 x float> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4f32_align4_zero: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: beq .LBB17_2 -; CHECK-LE-NEXT: @ %bb.1: @ %cond.load -; CHECK-LE-NEXT: vldr s0, .LCPI17_0 -; CHECK-LE-NEXT: vldr s4, [r0] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vdup.32 q0, r2 -; CHECK-LE-NEXT: vmov.f32 s0, s4 -; CHECK-LE-NEXT: b .LBB17_3 -; CHECK-LE-NEXT: .LBB17_2: -; CHECK-LE-NEXT: vmov.i32 q0, #0x0 -; CHECK-LE-NEXT: .LBB17_3: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s1, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s3, [r0, #12] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .p2align 2 -; CHECK-LE-NEXT: @ %bb.4: -; CHECK-LE-NEXT: .LCPI17_0: -; CHECK-LE-NEXT: .long 0 @ float 0 -; -; CHECK-BE-LABEL: masked_v4f32_align4_zero: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: beq .LBB17_2 -; CHECK-BE-NEXT: @ %bb.1: @ %cond.load -; CHECK-BE-NEXT: vldr s0, .LCPI17_0 -; CHECK-BE-NEXT: vldr s2, [r0] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vdup.32 q1, r2 -; CHECK-BE-NEXT: vmov.f32 s4, s2 -; CHECK-BE-NEXT: b .LBB17_3 -; CHECK-BE-NEXT: .LBB17_2: -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: .LBB17_3: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s5, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s6, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s7, [r0, #12] -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .p2align 2 -; CHECK-BE-NEXT: @ %bb.4: -; CHECK-BE-NEXT: .LCPI17_0: -; CHECK-BE-NEXT: .long 0 @ float 0 -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> zeroinitializer) - ret <4 x float> %l -} - -define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_undef(<4 x float> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4f32_align4_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vldrne s0, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s1, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s3, [r0, #12] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_align4_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vldrne s4, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s5, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s6, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s7, [r0, #12] -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> undef) - ret <4 x float> %l -} - -define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align1_undef(<4 x float> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4f32_align1_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: ldrne r2, [r0] -; CHECK-LE-NEXT: vmovne s0, r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #4] -; CHECK-LE-NEXT: vmovmi s1, r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r2, [r0, #8] -; CHECK-LE-NEXT: vmovmi s2, r2 -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: ldrmi r0, [r0, #12] -; CHECK-LE-NEXT: vmovmi s3, r0 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_align1_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: ldrne r2, [r0] -; CHECK-BE-NEXT: vmovne s4, r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #4] -; CHECK-BE-NEXT: vmovmi s5, r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r2, [r0, #8] -; CHECK-BE-NEXT: vmovmi s6, r2 -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: ldrmi r0, [r0, #12] -; CHECK-BE-NEXT: vmovmi s7, r0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 1, <4 x i1> %c, <4 x float> undef) - ret <4 x float> %l -} - -define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_other(<4 x float> *%dest, <4 x i32> %a, <4 x float> %b) { -; CHECK-LE-LABEL: masked_v4f32_align4_other: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vldrne s4, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s5, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s6, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s7, [r0, #12] -; CHECK-LE-NEXT: vmov q0, q1 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_align4_other: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q2, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q2, zr -; CHECK-BE-NEXT: vrev64.32 q2, q1 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vldrne s8, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s9, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s10, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s11, [r0, #12] -; CHECK-BE-NEXT: vrev64.32 q0, q2 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> %b) - ret <4 x float> %l -} - -define arm_aapcs_vfpcc i8* @masked_v4f32_preinc(i8* %x, i8* %y, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4f32_preinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r2, r2, #15 -; CHECK-LE-NEXT: lsls r3, r2, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vldrne s0, [r0] -; CHECK-LE-NEXT: lsls r3, r2, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s1, [r0, #4] -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r2, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s3, [r0, #12] -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_preinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r2, r2, #15 -; CHECK-BE-NEXT: lsls r3, r2, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vldrne s0, [r0] -; CHECK-BE-NEXT: lsls r3, r2, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s1, [r0, #4] -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r2, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s3, [r0, #12] -; CHECK-BE-NEXT: vstrw.32 q0, [r1] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %z to <4 x float>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) - %2 = bitcast i8* %y to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 4 - ret i8* %z -} - -define arm_aapcs_vfpcc i8* @masked_v4f32_postinc(i8* %x, i8* %y, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4f32_postinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: add.w r12, r0, #4 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r3, r2, #15 -; CHECK-LE-NEXT: lsls r2, r3, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vldrne s0, [r0] -; CHECK-LE-NEXT: lsls r2, r3, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s1, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r3, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r3, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi s3, [r0, #12] -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: mov r0, r12 -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_postinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: add.w r12, r0, #4 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r3, r2, #15 -; CHECK-BE-NEXT: lsls r2, r3, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vldrne s0, [r0] -; CHECK-BE-NEXT: lsls r2, r3, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s1, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r3, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r3, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi s3, [r0, #12] -; CHECK-BE-NEXT: vstrw.32 q0, [r1] -; CHECK-BE-NEXT: mov r0, r12 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %x to <4 x float>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) - %2 = bitcast i8* %y to <4 x float>* - store <4 x float> %1, <4 x float>* %2, align 4 - ret i8* %z -} - - -define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_zero(<8 x half> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8f16_align4_zero: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: beq .LBB23_2 -; CHECK-LE-NEXT: @ %bb.1: @ %cond.load -; CHECK-LE-NEXT: vldr.16 s0, .LCPI23_0 -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vldr.16 s0, [r0] -; CHECK-LE-NEXT: vmov r3, s0 -; CHECK-LE-NEXT: vdup.16 q0, r2 -; CHECK-LE-NEXT: vmov.16 q0[0], r3 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bmi .LBB23_3 -; CHECK-LE-NEXT: b .LBB23_4 -; CHECK-LE-NEXT: .LBB23_2: -; CHECK-LE-NEXT: vmov.i32 q0, #0x0 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bpl .LBB23_4 -; CHECK-LE-NEXT: .LBB23_3: @ %cond.load1 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #2] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[1], r2 -; CHECK-LE-NEXT: .LBB23_4: @ %else2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bmi .LBB23_11 -; CHECK-LE-NEXT: @ %bb.5: @ %else5 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bmi .LBB23_12 -; CHECK-LE-NEXT: .LBB23_6: @ %else8 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bmi .LBB23_13 -; CHECK-LE-NEXT: .LBB23_7: @ %else11 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bmi .LBB23_14 -; CHECK-LE-NEXT: .LBB23_8: @ %else14 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bmi .LBB23_15 -; CHECK-LE-NEXT: .LBB23_9: @ %else17 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bmi .LBB23_16 -; CHECK-LE-NEXT: .LBB23_10: @ %else20 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB23_11: @ %cond.load4 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #4] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bpl .LBB23_6 -; CHECK-LE-NEXT: .LBB23_12: @ %cond.load7 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #6] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bpl .LBB23_7 -; CHECK-LE-NEXT: .LBB23_13: @ %cond.load10 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #8] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bpl .LBB23_8 -; CHECK-LE-NEXT: .LBB23_14: @ %cond.load13 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #10] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bpl .LBB23_9 -; CHECK-LE-NEXT: .LBB23_15: @ %cond.load16 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #12] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bpl .LBB23_10 -; CHECK-LE-NEXT: .LBB23_16: @ %cond.load19 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #14] -; CHECK-LE-NEXT: vmov r0, s4 -; CHECK-LE-NEXT: vmov.16 q0[7], r0 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .p2align 1 -; CHECK-LE-NEXT: @ %bb.17: -; CHECK-LE-NEXT: .LCPI23_0: -; CHECK-LE-NEXT: .short 0 @ half 0 -; -; CHECK-BE-LABEL: masked_v8f16_align4_zero: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: beq .LBB23_2 -; CHECK-BE-NEXT: @ %bb.1: @ %cond.load -; CHECK-BE-NEXT: vldr.16 s0, .LCPI23_0 -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vldr.16 s0, [r0] -; CHECK-BE-NEXT: vdup.16 q1, r2 -; CHECK-BE-NEXT: vmov r3, s0 -; CHECK-BE-NEXT: vmov.16 q1[0], r3 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bmi .LBB23_3 -; CHECK-BE-NEXT: b .LBB23_4 -; CHECK-BE-NEXT: .LBB23_2: -; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vrev32.16 q1, q0 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bpl .LBB23_4 -; CHECK-BE-NEXT: .LBB23_3: @ %cond.load1 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #2] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[1], r2 -; CHECK-BE-NEXT: .LBB23_4: @ %else2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bmi .LBB23_12 -; CHECK-BE-NEXT: @ %bb.5: @ %else5 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bmi .LBB23_13 -; CHECK-BE-NEXT: .LBB23_6: @ %else8 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bmi .LBB23_14 -; CHECK-BE-NEXT: .LBB23_7: @ %else11 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bmi .LBB23_15 -; CHECK-BE-NEXT: .LBB23_8: @ %else14 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bmi .LBB23_16 -; CHECK-BE-NEXT: .LBB23_9: @ %else17 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bpl .LBB23_11 -; CHECK-BE-NEXT: .LBB23_10: @ %cond.load19 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #14] -; CHECK-BE-NEXT: vmov r0, s0 -; CHECK-BE-NEXT: vmov.16 q1[7], r0 -; CHECK-BE-NEXT: .LBB23_11: @ %else20 -; CHECK-BE-NEXT: vrev64.16 q0, q1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB23_12: @ %cond.load4 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #4] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bpl .LBB23_6 -; CHECK-BE-NEXT: .LBB23_13: @ %cond.load7 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #6] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bpl .LBB23_7 -; CHECK-BE-NEXT: .LBB23_14: @ %cond.load10 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #8] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bpl .LBB23_8 -; CHECK-BE-NEXT: .LBB23_15: @ %cond.load13 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #10] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bpl .LBB23_9 -; CHECK-BE-NEXT: .LBB23_16: @ %cond.load16 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #12] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bmi .LBB23_10 -; CHECK-BE-NEXT: b .LBB23_11 -; CHECK-BE-NEXT: .p2align 1 -; CHECK-BE-NEXT: @ %bb.17: -; CHECK-BE-NEXT: .LCPI23_0: -; CHECK-BE-NEXT: .short 0 @ half 0 -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> zeroinitializer) - ret <8 x half> %l -} - -define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_undef(<8 x half> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8f16_align4_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: bne .LBB24_9 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bmi .LBB24_10 -; CHECK-LE-NEXT: .LBB24_2: @ %else2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bmi .LBB24_11 -; CHECK-LE-NEXT: .LBB24_3: @ %else5 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bmi .LBB24_12 -; CHECK-LE-NEXT: .LBB24_4: @ %else8 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bmi .LBB24_13 -; CHECK-LE-NEXT: .LBB24_5: @ %else11 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bmi .LBB24_14 -; CHECK-LE-NEXT: .LBB24_6: @ %else14 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bmi .LBB24_15 -; CHECK-LE-NEXT: .LBB24_7: @ %else17 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bmi .LBB24_16 -; CHECK-LE-NEXT: .LBB24_8: @ %else20 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB24_9: @ %cond.load -; CHECK-LE-NEXT: vldr.16 s0, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bpl .LBB24_2 -; CHECK-LE-NEXT: .LBB24_10: @ %cond.load1 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #2] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bpl .LBB24_3 -; CHECK-LE-NEXT: .LBB24_11: @ %cond.load4 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #4] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bpl .LBB24_4 -; CHECK-LE-NEXT: .LBB24_12: @ %cond.load7 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #6] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bpl .LBB24_5 -; CHECK-LE-NEXT: .LBB24_13: @ %cond.load10 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #8] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bpl .LBB24_6 -; CHECK-LE-NEXT: .LBB24_14: @ %cond.load13 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #10] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bpl .LBB24_7 -; CHECK-LE-NEXT: .LBB24_15: @ %cond.load16 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #12] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bpl .LBB24_8 -; CHECK-LE-NEXT: .LBB24_16: @ %cond.load19 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #14] -; CHECK-LE-NEXT: vmov r0, s4 -; CHECK-LE-NEXT: vmov.16 q0[7], r0 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8f16_align4_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: bne .LBB24_10 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bmi .LBB24_11 -; CHECK-BE-NEXT: .LBB24_2: @ %else2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bmi .LBB24_12 -; CHECK-BE-NEXT: .LBB24_3: @ %else5 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bmi .LBB24_13 -; CHECK-BE-NEXT: .LBB24_4: @ %else8 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bmi .LBB24_14 -; CHECK-BE-NEXT: .LBB24_5: @ %else11 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bmi .LBB24_15 -; CHECK-BE-NEXT: .LBB24_6: @ %else14 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bmi .LBB24_16 -; CHECK-BE-NEXT: .LBB24_7: @ %else17 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bpl .LBB24_9 -; CHECK-BE-NEXT: .LBB24_8: @ %cond.load19 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #14] -; CHECK-BE-NEXT: vmov r0, s0 -; CHECK-BE-NEXT: vmov.16 q1[7], r0 -; CHECK-BE-NEXT: .LBB24_9: @ %else20 -; CHECK-BE-NEXT: vrev64.16 q0, q1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB24_10: @ %cond.load -; CHECK-BE-NEXT: vldr.16 s4, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bpl .LBB24_2 -; CHECK-BE-NEXT: .LBB24_11: @ %cond.load1 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #2] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bpl .LBB24_3 -; CHECK-BE-NEXT: .LBB24_12: @ %cond.load4 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #4] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bpl .LBB24_4 -; CHECK-BE-NEXT: .LBB24_13: @ %cond.load7 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #6] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bpl .LBB24_5 -; CHECK-BE-NEXT: .LBB24_14: @ %cond.load10 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #8] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bpl .LBB24_6 -; CHECK-BE-NEXT: .LBB24_15: @ %cond.load13 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #10] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bpl .LBB24_7 -; CHECK-BE-NEXT: .LBB24_16: @ %cond.load16 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #12] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bmi .LBB24_8 -; CHECK-BE-NEXT: b .LBB24_9 -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> undef) - ret <8 x half> %l -} - -define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align1_undef(<8 x half> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8f16_align1_undef: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #40 -; CHECK-LE-NEXT: sub sp, #40 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: bne .LBB25_9 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bmi .LBB25_10 -; CHECK-LE-NEXT: .LBB25_2: @ %else2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bmi .LBB25_11 -; CHECK-LE-NEXT: .LBB25_3: @ %else5 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bmi .LBB25_12 -; CHECK-LE-NEXT: .LBB25_4: @ %else8 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bmi .LBB25_13 -; CHECK-LE-NEXT: .LBB25_5: @ %else11 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bmi .LBB25_14 -; CHECK-LE-NEXT: .LBB25_6: @ %else14 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bmi .LBB25_15 -; CHECK-LE-NEXT: .LBB25_7: @ %else17 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bmi .LBB25_16 -; CHECK-LE-NEXT: .LBB25_8: @ %else20 -; CHECK-LE-NEXT: add sp, #40 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB25_9: @ %cond.load -; CHECK-LE-NEXT: ldrh r2, [r0] -; CHECK-LE-NEXT: strh.w r2, [sp, #28] -; CHECK-LE-NEXT: vldr.16 s0, [sp, #28] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bpl .LBB25_2 -; CHECK-LE-NEXT: .LBB25_10: @ %cond.load1 -; CHECK-LE-NEXT: ldrh r2, [r0, #2] -; CHECK-LE-NEXT: strh.w r2, [sp, #24] -; CHECK-LE-NEXT: vldr.16 s4, [sp, #24] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bpl .LBB25_3 -; CHECK-LE-NEXT: .LBB25_11: @ %cond.load4 -; CHECK-LE-NEXT: ldrh r2, [r0, #4] -; CHECK-LE-NEXT: strh.w r2, [sp, #20] -; CHECK-LE-NEXT: vldr.16 s4, [sp, #20] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bpl .LBB25_4 -; CHECK-LE-NEXT: .LBB25_12: @ %cond.load7 -; CHECK-LE-NEXT: ldrh r2, [r0, #6] -; CHECK-LE-NEXT: strh.w r2, [sp, #16] -; CHECK-LE-NEXT: vldr.16 s4, [sp, #16] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bpl .LBB25_5 -; CHECK-LE-NEXT: .LBB25_13: @ %cond.load10 -; CHECK-LE-NEXT: ldrh r2, [r0, #8] -; CHECK-LE-NEXT: strh.w r2, [sp, #12] -; CHECK-LE-NEXT: vldr.16 s4, [sp, #12] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bpl .LBB25_6 -; CHECK-LE-NEXT: .LBB25_14: @ %cond.load13 -; CHECK-LE-NEXT: ldrh r2, [r0, #10] -; CHECK-LE-NEXT: strh.w r2, [sp, #8] -; CHECK-LE-NEXT: vldr.16 s4, [sp, #8] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bpl .LBB25_7 -; CHECK-LE-NEXT: .LBB25_15: @ %cond.load16 -; CHECK-LE-NEXT: ldrh r2, [r0, #12] -; CHECK-LE-NEXT: strh.w r2, [sp, #4] -; CHECK-LE-NEXT: vldr.16 s4, [sp, #4] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bpl .LBB25_8 -; CHECK-LE-NEXT: .LBB25_16: @ %cond.load19 -; CHECK-LE-NEXT: ldrh r0, [r0, #14] -; CHECK-LE-NEXT: strh.w r0, [sp] -; CHECK-LE-NEXT: vldr.16 s4, [sp] -; CHECK-LE-NEXT: vmov r0, s4 -; CHECK-LE-NEXT: vmov.16 q0[7], r0 -; CHECK-LE-NEXT: add sp, #40 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8f16_align1_undef: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #40 -; CHECK-BE-NEXT: sub sp, #40 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: bne .LBB25_10 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bmi .LBB25_11 -; CHECK-BE-NEXT: .LBB25_2: @ %else2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bmi .LBB25_12 -; CHECK-BE-NEXT: .LBB25_3: @ %else5 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bmi .LBB25_13 -; CHECK-BE-NEXT: .LBB25_4: @ %else8 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bmi .LBB25_14 -; CHECK-BE-NEXT: .LBB25_5: @ %else11 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bmi .LBB25_15 -; CHECK-BE-NEXT: .LBB25_6: @ %else14 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bmi .LBB25_16 -; CHECK-BE-NEXT: .LBB25_7: @ %else17 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bpl .LBB25_9 -; CHECK-BE-NEXT: .LBB25_8: @ %cond.load19 -; CHECK-BE-NEXT: ldrh r0, [r0, #14] -; CHECK-BE-NEXT: strh.w r0, [sp] -; CHECK-BE-NEXT: vldr.16 s0, [sp] -; CHECK-BE-NEXT: vmov r0, s0 -; CHECK-BE-NEXT: vmov.16 q1[7], r0 -; CHECK-BE-NEXT: .LBB25_9: @ %else20 -; CHECK-BE-NEXT: vrev64.16 q0, q1 -; CHECK-BE-NEXT: add sp, #40 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB25_10: @ %cond.load -; CHECK-BE-NEXT: ldrh r2, [r0] -; CHECK-BE-NEXT: strh.w r2, [sp, #28] -; CHECK-BE-NEXT: vldr.16 s4, [sp, #28] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bpl .LBB25_2 -; CHECK-BE-NEXT: .LBB25_11: @ %cond.load1 -; CHECK-BE-NEXT: ldrh r2, [r0, #2] -; CHECK-BE-NEXT: strh.w r2, [sp, #24] -; CHECK-BE-NEXT: vldr.16 s0, [sp, #24] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bpl .LBB25_3 -; CHECK-BE-NEXT: .LBB25_12: @ %cond.load4 -; CHECK-BE-NEXT: ldrh r2, [r0, #4] -; CHECK-BE-NEXT: strh.w r2, [sp, #20] -; CHECK-BE-NEXT: vldr.16 s0, [sp, #20] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bpl .LBB25_4 -; CHECK-BE-NEXT: .LBB25_13: @ %cond.load7 -; CHECK-BE-NEXT: ldrh r2, [r0, #6] -; CHECK-BE-NEXT: strh.w r2, [sp, #16] -; CHECK-BE-NEXT: vldr.16 s0, [sp, #16] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bpl .LBB25_5 -; CHECK-BE-NEXT: .LBB25_14: @ %cond.load10 -; CHECK-BE-NEXT: ldrh r2, [r0, #8] -; CHECK-BE-NEXT: strh.w r2, [sp, #12] -; CHECK-BE-NEXT: vldr.16 s0, [sp, #12] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bpl .LBB25_6 -; CHECK-BE-NEXT: .LBB25_15: @ %cond.load13 -; CHECK-BE-NEXT: ldrh r2, [r0, #10] -; CHECK-BE-NEXT: strh.w r2, [sp, #8] -; CHECK-BE-NEXT: vldr.16 s0, [sp, #8] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bpl .LBB25_7 -; CHECK-BE-NEXT: .LBB25_16: @ %cond.load16 -; CHECK-BE-NEXT: ldrh r2, [r0, #12] -; CHECK-BE-NEXT: strh.w r2, [sp, #4] -; CHECK-BE-NEXT: vldr.16 s0, [sp, #4] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q1[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bmi .LBB25_8 -; CHECK-BE-NEXT: b .LBB25_9 -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 1, <8 x i1> %c, <8 x half> undef) - ret <8 x half> %l -} - -define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_other(<8 x half> *%dest, <8 x i16> %a, <8 x half> %b) { -; CHECK-LE-LABEL: masked_v8f16_align4_other: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: bne .LBB26_10 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bmi .LBB26_11 -; CHECK-LE-NEXT: .LBB26_2: @ %else2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bmi .LBB26_12 -; CHECK-LE-NEXT: .LBB26_3: @ %else5 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bmi .LBB26_13 -; CHECK-LE-NEXT: .LBB26_4: @ %else8 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bmi .LBB26_14 -; CHECK-LE-NEXT: .LBB26_5: @ %else11 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bmi .LBB26_15 -; CHECK-LE-NEXT: .LBB26_6: @ %else14 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bmi .LBB26_16 -; CHECK-LE-NEXT: .LBB26_7: @ %else17 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bpl .LBB26_9 -; CHECK-LE-NEXT: .LBB26_8: @ %cond.load19 -; CHECK-LE-NEXT: vldr.16 s0, [r0, #14] -; CHECK-LE-NEXT: vmov r0, s0 -; CHECK-LE-NEXT: vmov.16 q1[7], r0 -; CHECK-LE-NEXT: .LBB26_9: @ %else20 -; CHECK-LE-NEXT: vmov q0, q1 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB26_10: @ %cond.load -; CHECK-LE-NEXT: vldr.16 s0, [r0] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vmov.16 q1[0], r2 -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bpl .LBB26_2 -; CHECK-LE-NEXT: .LBB26_11: @ %cond.load1 -; CHECK-LE-NEXT: vldr.16 s0, [r0, #2] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vmov.16 q1[1], r2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bpl .LBB26_3 -; CHECK-LE-NEXT: .LBB26_12: @ %cond.load4 -; CHECK-LE-NEXT: vldr.16 s0, [r0, #4] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vmov.16 q1[2], r2 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bpl .LBB26_4 -; CHECK-LE-NEXT: .LBB26_13: @ %cond.load7 -; CHECK-LE-NEXT: vldr.16 s0, [r0, #6] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vmov.16 q1[3], r2 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bpl .LBB26_5 -; CHECK-LE-NEXT: .LBB26_14: @ %cond.load10 -; CHECK-LE-NEXT: vldr.16 s0, [r0, #8] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vmov.16 q1[4], r2 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bpl .LBB26_6 -; CHECK-LE-NEXT: .LBB26_15: @ %cond.load13 -; CHECK-LE-NEXT: vldr.16 s0, [r0, #10] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vmov.16 q1[5], r2 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bpl .LBB26_7 -; CHECK-LE-NEXT: .LBB26_16: @ %cond.load16 -; CHECK-LE-NEXT: vldr.16 s0, [r0, #12] -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: vmov.16 q1[6], r2 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bmi .LBB26_8 -; CHECK-LE-NEXT: b .LBB26_9 -; -; CHECK-BE-LABEL: masked_v8f16_align4_other: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q2, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr -; CHECK-BE-NEXT: vrev64.16 q2, q1 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: bne .LBB26_10 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bmi .LBB26_11 -; CHECK-BE-NEXT: .LBB26_2: @ %else2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bmi .LBB26_12 -; CHECK-BE-NEXT: .LBB26_3: @ %else5 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bmi .LBB26_13 -; CHECK-BE-NEXT: .LBB26_4: @ %else8 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bmi .LBB26_14 -; CHECK-BE-NEXT: .LBB26_5: @ %else11 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bmi .LBB26_15 -; CHECK-BE-NEXT: .LBB26_6: @ %else14 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bmi .LBB26_16 -; CHECK-BE-NEXT: .LBB26_7: @ %else17 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bpl .LBB26_9 -; CHECK-BE-NEXT: .LBB26_8: @ %cond.load19 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #14] -; CHECK-BE-NEXT: vmov r0, s0 -; CHECK-BE-NEXT: vmov.16 q2[7], r0 -; CHECK-BE-NEXT: .LBB26_9: @ %else20 -; CHECK-BE-NEXT: vrev64.16 q0, q2 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB26_10: @ %cond.load -; CHECK-BE-NEXT: vldr.16 s0, [r0] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q2[0], r2 -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bpl .LBB26_2 -; CHECK-BE-NEXT: .LBB26_11: @ %cond.load1 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #2] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q2[1], r2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bpl .LBB26_3 -; CHECK-BE-NEXT: .LBB26_12: @ %cond.load4 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #4] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q2[2], r2 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bpl .LBB26_4 -; CHECK-BE-NEXT: .LBB26_13: @ %cond.load7 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #6] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q2[3], r2 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bpl .LBB26_5 -; CHECK-BE-NEXT: .LBB26_14: @ %cond.load10 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #8] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q2[4], r2 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bpl .LBB26_6 -; CHECK-BE-NEXT: .LBB26_15: @ %cond.load13 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #10] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q2[5], r2 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bpl .LBB26_7 -; CHECK-BE-NEXT: .LBB26_16: @ %cond.load16 -; CHECK-BE-NEXT: vldr.16 s0, [r0, #12] -; CHECK-BE-NEXT: vmov r2, s0 -; CHECK-BE-NEXT: vmov.16 q2[6], r2 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bmi .LBB26_8 -; CHECK-BE-NEXT: b .LBB26_9 -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> %b) - ret <8 x half> %l -} - -define arm_aapcs_vfpcc i8* @masked_v8f16_preinc(i8* %x, i8* %y, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8f16_preinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r2, r12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #0, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #1, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #2, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #3, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #4, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #5, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #6, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #7, #1 -; CHECK-LE-NEXT: uxtb r2, r3 -; CHECK-LE-NEXT: lsls r3, r3, #31 -; CHECK-LE-NEXT: bne .LBB27_10 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r3, r2, #30 -; CHECK-LE-NEXT: bmi .LBB27_11 -; CHECK-LE-NEXT: .LBB27_2: @ %else2 -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: bmi .LBB27_12 -; CHECK-LE-NEXT: .LBB27_3: @ %else5 -; CHECK-LE-NEXT: lsls r3, r2, #28 -; CHECK-LE-NEXT: bmi .LBB27_13 -; CHECK-LE-NEXT: .LBB27_4: @ %else8 -; CHECK-LE-NEXT: lsls r3, r2, #27 -; CHECK-LE-NEXT: bmi .LBB27_14 -; CHECK-LE-NEXT: .LBB27_5: @ %else11 -; CHECK-LE-NEXT: lsls r3, r2, #26 -; CHECK-LE-NEXT: bmi .LBB27_15 -; CHECK-LE-NEXT: .LBB27_6: @ %else14 -; CHECK-LE-NEXT: lsls r3, r2, #25 -; CHECK-LE-NEXT: bmi .LBB27_16 -; CHECK-LE-NEXT: .LBB27_7: @ %else17 -; CHECK-LE-NEXT: lsls r2, r2, #24 -; CHECK-LE-NEXT: bpl .LBB27_9 -; CHECK-LE-NEXT: .LBB27_8: @ %cond.load19 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #14] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[7], r2 -; CHECK-LE-NEXT: .LBB27_9: @ %else20 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB27_10: @ %cond.load -; CHECK-LE-NEXT: vldr.16 s0, [r0] -; CHECK-LE-NEXT: lsls r3, r2, #30 -; CHECK-LE-NEXT: bpl .LBB27_2 -; CHECK-LE-NEXT: .LBB27_11: @ %cond.load1 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #2] -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov.16 q0[1], r3 -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: bpl .LBB27_3 -; CHECK-LE-NEXT: .LBB27_12: @ %cond.load4 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #4] -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov.16 q0[2], r3 -; CHECK-LE-NEXT: lsls r3, r2, #28 -; CHECK-LE-NEXT: bpl .LBB27_4 -; CHECK-LE-NEXT: .LBB27_13: @ %cond.load7 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #6] -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov.16 q0[3], r3 -; CHECK-LE-NEXT: lsls r3, r2, #27 -; CHECK-LE-NEXT: bpl .LBB27_5 -; CHECK-LE-NEXT: .LBB27_14: @ %cond.load10 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #8] -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov.16 q0[4], r3 -; CHECK-LE-NEXT: lsls r3, r2, #26 -; CHECK-LE-NEXT: bpl .LBB27_6 -; CHECK-LE-NEXT: .LBB27_15: @ %cond.load13 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #10] -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov.16 q0[5], r3 -; CHECK-LE-NEXT: lsls r3, r2, #25 -; CHECK-LE-NEXT: bpl .LBB27_7 -; CHECK-LE-NEXT: .LBB27_16: @ %cond.load16 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #12] -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov.16 q0[6], r3 -; CHECK-LE-NEXT: lsls r2, r2, #24 -; CHECK-LE-NEXT: bmi .LBB27_8 -; CHECK-LE-NEXT: b .LBB27_9 -; -; CHECK-BE-LABEL: masked_v8f16_preinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r3, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: and r2, r12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #2, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #3, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #4, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #5, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #6, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #7, #1 -; CHECK-BE-NEXT: uxtb r2, r3 -; CHECK-BE-NEXT: lsls r3, r3, #31 -; CHECK-BE-NEXT: bne .LBB27_10 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r3, r2, #30 -; CHECK-BE-NEXT: bmi .LBB27_11 -; CHECK-BE-NEXT: .LBB27_2: @ %else2 -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: bmi .LBB27_12 -; CHECK-BE-NEXT: .LBB27_3: @ %else5 -; CHECK-BE-NEXT: lsls r3, r2, #28 -; CHECK-BE-NEXT: bmi .LBB27_13 -; CHECK-BE-NEXT: .LBB27_4: @ %else8 -; CHECK-BE-NEXT: lsls r3, r2, #27 -; CHECK-BE-NEXT: bmi .LBB27_14 -; CHECK-BE-NEXT: .LBB27_5: @ %else11 -; CHECK-BE-NEXT: lsls r3, r2, #26 -; CHECK-BE-NEXT: bmi .LBB27_15 -; CHECK-BE-NEXT: .LBB27_6: @ %else14 -; CHECK-BE-NEXT: lsls r3, r2, #25 -; CHECK-BE-NEXT: bmi .LBB27_16 -; CHECK-BE-NEXT: .LBB27_7: @ %else17 -; CHECK-BE-NEXT: lsls r2, r2, #24 -; CHECK-BE-NEXT: bpl .LBB27_9 -; CHECK-BE-NEXT: .LBB27_8: @ %cond.load19 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #14] -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: vmov.16 q0[7], r2 -; CHECK-BE-NEXT: .LBB27_9: @ %else20 -; CHECK-BE-NEXT: vstrh.16 q0, [r1] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB27_10: @ %cond.load -; CHECK-BE-NEXT: vldr.16 s0, [r0] -; CHECK-BE-NEXT: lsls r3, r2, #30 -; CHECK-BE-NEXT: bpl .LBB27_2 -; CHECK-BE-NEXT: .LBB27_11: @ %cond.load1 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #2] -; CHECK-BE-NEXT: vmov r3, s4 -; CHECK-BE-NEXT: vmov.16 q0[1], r3 -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: bpl .LBB27_3 -; CHECK-BE-NEXT: .LBB27_12: @ %cond.load4 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #4] -; CHECK-BE-NEXT: vmov r3, s4 -; CHECK-BE-NEXT: vmov.16 q0[2], r3 -; CHECK-BE-NEXT: lsls r3, r2, #28 -; CHECK-BE-NEXT: bpl .LBB27_4 -; CHECK-BE-NEXT: .LBB27_13: @ %cond.load7 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #6] -; CHECK-BE-NEXT: vmov r3, s4 -; CHECK-BE-NEXT: vmov.16 q0[3], r3 -; CHECK-BE-NEXT: lsls r3, r2, #27 -; CHECK-BE-NEXT: bpl .LBB27_5 -; CHECK-BE-NEXT: .LBB27_14: @ %cond.load10 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #8] -; CHECK-BE-NEXT: vmov r3, s4 -; CHECK-BE-NEXT: vmov.16 q0[4], r3 -; CHECK-BE-NEXT: lsls r3, r2, #26 -; CHECK-BE-NEXT: bpl .LBB27_6 -; CHECK-BE-NEXT: .LBB27_15: @ %cond.load13 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #10] -; CHECK-BE-NEXT: vmov r3, s4 -; CHECK-BE-NEXT: vmov.16 q0[5], r3 -; CHECK-BE-NEXT: lsls r3, r2, #25 -; CHECK-BE-NEXT: bpl .LBB27_7 -; CHECK-BE-NEXT: .LBB27_16: @ %cond.load16 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #12] -; CHECK-BE-NEXT: vmov r3, s4 -; CHECK-BE-NEXT: vmov.16 q0[6], r3 -; CHECK-BE-NEXT: lsls r2, r2, #24 -; CHECK-BE-NEXT: bmi .LBB27_8 -; CHECK-BE-NEXT: b .LBB27_9 -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %z to <8 x half>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 4, <8 x i1> %c, <8 x half> undef) - %2 = bitcast i8* %y to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 4 - ret i8* %z -} - -define arm_aapcs_vfpcc i8* @masked_v8f16_postinc(i8* %x, i8* %y, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8f16_postinc: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: @ implicit-def: $q0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #7, #1 -; CHECK-LE-NEXT: uxtb r3, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: bne .LBB28_12 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r2, r3, #30 -; CHECK-LE-NEXT: bmi .LBB28_13 -; CHECK-LE-NEXT: .LBB28_2: @ %else2 -; CHECK-LE-NEXT: lsls r2, r3, #29 -; CHECK-LE-NEXT: bmi .LBB28_14 -; CHECK-LE-NEXT: .LBB28_3: @ %else5 -; CHECK-LE-NEXT: lsls r2, r3, #28 -; CHECK-LE-NEXT: bmi .LBB28_15 -; CHECK-LE-NEXT: .LBB28_4: @ %else8 -; CHECK-LE-NEXT: lsls r2, r3, #27 -; CHECK-LE-NEXT: bmi .LBB28_16 -; CHECK-LE-NEXT: .LBB28_5: @ %else11 -; CHECK-LE-NEXT: lsls r2, r3, #26 -; CHECK-LE-NEXT: bpl .LBB28_7 -; CHECK-LE-NEXT: .LBB28_6: @ %cond.load13 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #10] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[5], r2 -; CHECK-LE-NEXT: .LBB28_7: @ %else14 -; CHECK-LE-NEXT: add.w r12, r0, #4 -; CHECK-LE-NEXT: lsls r2, r3, #25 -; CHECK-LE-NEXT: bpl .LBB28_9 -; CHECK-LE-NEXT: @ %bb.8: @ %cond.load16 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #12] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[6], r2 -; CHECK-LE-NEXT: .LBB28_9: @ %else17 -; CHECK-LE-NEXT: lsls r2, r3, #24 -; CHECK-LE-NEXT: bpl .LBB28_11 -; CHECK-LE-NEXT: @ %bb.10: @ %cond.load19 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #14] -; CHECK-LE-NEXT: vmov r0, s4 -; CHECK-LE-NEXT: vmov.16 q0[7], r0 -; CHECK-LE-NEXT: .LBB28_11: @ %else20 -; CHECK-LE-NEXT: vstrw.32 q0, [r1] -; CHECK-LE-NEXT: mov r0, r12 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB28_12: @ %cond.load -; CHECK-LE-NEXT: vldr.16 s0, [r0] -; CHECK-LE-NEXT: lsls r2, r3, #30 -; CHECK-LE-NEXT: bpl .LBB28_2 -; CHECK-LE-NEXT: .LBB28_13: @ %cond.load1 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #2] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[1], r2 -; CHECK-LE-NEXT: lsls r2, r3, #29 -; CHECK-LE-NEXT: bpl .LBB28_3 -; CHECK-LE-NEXT: .LBB28_14: @ %cond.load4 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #4] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[2], r2 -; CHECK-LE-NEXT: lsls r2, r3, #28 -; CHECK-LE-NEXT: bpl .LBB28_4 -; CHECK-LE-NEXT: .LBB28_15: @ %cond.load7 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #6] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[3], r2 -; CHECK-LE-NEXT: lsls r2, r3, #27 -; CHECK-LE-NEXT: bpl .LBB28_5 -; CHECK-LE-NEXT: .LBB28_16: @ %cond.load10 -; CHECK-LE-NEXT: vldr.16 s4, [r0, #8] -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: vmov.16 q0[4], r2 -; CHECK-LE-NEXT: lsls r2, r3, #26 -; CHECK-LE-NEXT: bmi .LBB28_6 -; CHECK-LE-NEXT: b .LBB28_7 -; -; CHECK-BE-LABEL: masked_v8f16_postinc: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: @ implicit-def: $q0 -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #7, #1 -; CHECK-BE-NEXT: uxtb r3, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: bne .LBB28_12 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r2, r3, #30 -; CHECK-BE-NEXT: bmi .LBB28_13 -; CHECK-BE-NEXT: .LBB28_2: @ %else2 -; CHECK-BE-NEXT: lsls r2, r3, #29 -; CHECK-BE-NEXT: bmi .LBB28_14 -; CHECK-BE-NEXT: .LBB28_3: @ %else5 -; CHECK-BE-NEXT: lsls r2, r3, #28 -; CHECK-BE-NEXT: bmi .LBB28_15 -; CHECK-BE-NEXT: .LBB28_4: @ %else8 -; CHECK-BE-NEXT: lsls r2, r3, #27 -; CHECK-BE-NEXT: bmi .LBB28_16 -; CHECK-BE-NEXT: .LBB28_5: @ %else11 -; CHECK-BE-NEXT: lsls r2, r3, #26 -; CHECK-BE-NEXT: bpl .LBB28_7 -; CHECK-BE-NEXT: .LBB28_6: @ %cond.load13 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #10] -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: vmov.16 q0[5], r2 -; CHECK-BE-NEXT: .LBB28_7: @ %else14 -; CHECK-BE-NEXT: add.w r12, r0, #4 -; CHECK-BE-NEXT: lsls r2, r3, #25 -; CHECK-BE-NEXT: bpl .LBB28_9 -; CHECK-BE-NEXT: @ %bb.8: @ %cond.load16 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #12] -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: vmov.16 q0[6], r2 -; CHECK-BE-NEXT: .LBB28_9: @ %else17 -; CHECK-BE-NEXT: lsls r2, r3, #24 -; CHECK-BE-NEXT: bpl .LBB28_11 -; CHECK-BE-NEXT: @ %bb.10: @ %cond.load19 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #14] -; CHECK-BE-NEXT: vmov r0, s4 -; CHECK-BE-NEXT: vmov.16 q0[7], r0 -; CHECK-BE-NEXT: .LBB28_11: @ %else20 -; CHECK-BE-NEXT: vstrh.16 q0, [r1] -; CHECK-BE-NEXT: mov r0, r12 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB28_12: @ %cond.load -; CHECK-BE-NEXT: vldr.16 s0, [r0] -; CHECK-BE-NEXT: lsls r2, r3, #30 -; CHECK-BE-NEXT: bpl .LBB28_2 -; CHECK-BE-NEXT: .LBB28_13: @ %cond.load1 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #2] -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: vmov.16 q0[1], r2 -; CHECK-BE-NEXT: lsls r2, r3, #29 -; CHECK-BE-NEXT: bpl .LBB28_3 -; CHECK-BE-NEXT: .LBB28_14: @ %cond.load4 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #4] -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: vmov.16 q0[2], r2 -; CHECK-BE-NEXT: lsls r2, r3, #28 -; CHECK-BE-NEXT: bpl .LBB28_4 -; CHECK-BE-NEXT: .LBB28_15: @ %cond.load7 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #6] -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: vmov.16 q0[3], r2 -; CHECK-BE-NEXT: lsls r2, r3, #27 -; CHECK-BE-NEXT: bpl .LBB28_5 -; CHECK-BE-NEXT: .LBB28_16: @ %cond.load10 -; CHECK-BE-NEXT: vldr.16 s4, [r0, #8] -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: vmov.16 q0[4], r2 -; CHECK-BE-NEXT: lsls r2, r3, #26 -; CHECK-BE-NEXT: bmi .LBB28_6 -; CHECK-BE-NEXT: b .LBB28_7 -entry: - %z = getelementptr inbounds i8, i8* %x, i32 4 - %0 = bitcast i8* %x to <8 x half>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 4, <8 x i1> %c, <8 x half> undef) - %2 = bitcast i8* %y to <8 x half>* - store <8 x half> %1, <8 x half>* %2, align 4 - ret i8* %z -} - - -define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 x i64> %a) { -; CHECK-LE-LABEL: masked_v2i64_align4_zero: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vmov r3, s0 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmov r1, s1 -; CHECK-LE-NEXT: vmov r12, s3 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vmov r3, s2 -; CHECK-LE-NEXT: sbcs.w r1, r2, r1 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: sbcs.w r3, r2, r12 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r2, #1 -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: mvnne r2, #1 -; CHECK-LE-NEXT: bfi r2, r1, #0, #1 -; CHECK-LE-NEXT: and r1, r2, #3 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: beq .LBB29_2 -; CHECK-LE-NEXT: @ %bb.1: @ %cond.load -; CHECK-LE-NEXT: vldr d1, .LCPI29_0 -; CHECK-LE-NEXT: vldr d0, [r0] -; CHECK-LE-NEXT: b .LBB29_3 -; CHECK-LE-NEXT: .LBB29_2: -; CHECK-LE-NEXT: vmov.i32 q0, #0x0 -; CHECK-LE-NEXT: .LBB29_3: @ %else -; CHECK-LE-NEXT: lsls r1, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi d1, [r0, #8] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .p2align 3 -; CHECK-LE-NEXT: @ %bb.4: -; CHECK-LE-NEXT: .LCPI29_0: -; CHECK-LE-NEXT: .long 0 @ double 0 -; CHECK-LE-NEXT: .long 0 -; -; CHECK-BE-LABEL: masked_v2i64_align4_zero: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vmov r3, s7 -; CHECK-BE-NEXT: vmov r1, s6 -; CHECK-BE-NEXT: vmov r12, s4 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vmov r3, s5 -; CHECK-BE-NEXT: sbcs.w r1, r2, r1 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: sbcs.w r3, r2, r12 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r2, #1 -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: mvnne r2, #1 -; CHECK-BE-NEXT: bfi r2, r1, #0, #1 -; CHECK-BE-NEXT: and r1, r2, #3 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: beq .LBB29_2 -; CHECK-BE-NEXT: @ %bb.1: @ %cond.load -; CHECK-BE-NEXT: vldr d1, .LCPI29_0 -; CHECK-BE-NEXT: vldr d0, [r0] -; CHECK-BE-NEXT: b .LBB29_3 -; CHECK-BE-NEXT: .LBB29_2: -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: .LBB29_3: @ %else -; CHECK-BE-NEXT: lsls r1, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi d1, [r0, #8] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .p2align 3 -; CHECK-BE-NEXT: @ %bb.4: -; CHECK-BE-NEXT: .LCPI29_0: -; CHECK-BE-NEXT: .long 0 @ double 0 -; CHECK-BE-NEXT: .long 0 -entry: - %c = icmp sgt <2 x i64> %a, zeroinitializer - %l = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %dest, i32 8, <2 x i1> %c, <2 x i64> zeroinitializer) - ret <2 x i64> %l -} - -define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) { -; CHECK-LE-LABEL: masked_v2f64_align4_zero: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmov r1, s5 -; CHECK-LE-NEXT: vmov r12, s7 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vmov r3, s6 -; CHECK-LE-NEXT: sbcs.w r1, r2, r1 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: sbcs.w r3, r2, r12 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r2, #1 -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: mvnne r2, #1 -; CHECK-LE-NEXT: bfi r2, r1, #0, #1 -; CHECK-LE-NEXT: and r1, r2, #3 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: beq .LBB30_2 -; CHECK-LE-NEXT: @ %bb.1: @ %cond.load -; CHECK-LE-NEXT: vldr d1, .LCPI30_0 -; CHECK-LE-NEXT: vldr d0, [r0] -; CHECK-LE-NEXT: b .LBB30_3 -; CHECK-LE-NEXT: .LBB30_2: -; CHECK-LE-NEXT: vmov.i32 q0, #0x0 -; CHECK-LE-NEXT: .LBB30_3: @ %else -; CHECK-LE-NEXT: lsls r1, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vldrmi d1, [r0, #8] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .p2align 3 -; CHECK-LE-NEXT: @ %bb.4: -; CHECK-LE-NEXT: .LCPI30_0: -; CHECK-LE-NEXT: .long 0 @ double 0 -; CHECK-LE-NEXT: .long 0 -; -; CHECK-BE-LABEL: masked_v2f64_align4_zero: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vmov r3, s3 -; CHECK-BE-NEXT: vmov r1, s2 -; CHECK-BE-NEXT: vmov r12, s0 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vmov r3, s1 -; CHECK-BE-NEXT: sbcs.w r1, r2, r1 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: sbcs.w r3, r2, r12 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r2, #1 -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: mvnne r2, #1 -; CHECK-BE-NEXT: bfi r2, r1, #0, #1 -; CHECK-BE-NEXT: and r1, r2, #3 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: beq .LBB30_2 -; CHECK-BE-NEXT: @ %bb.1: @ %cond.load -; CHECK-BE-NEXT: vldr d1, .LCPI30_0 -; CHECK-BE-NEXT: vldr d0, [r0] -; CHECK-BE-NEXT: b .LBB30_3 -; CHECK-BE-NEXT: .LBB30_2: -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: .LBB30_3: @ %else -; CHECK-BE-NEXT: lsls r1, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vldrmi d1, [r0, #8] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .p2align 3 -; CHECK-BE-NEXT: @ %bb.4: -; CHECK-BE-NEXT: .LCPI30_0: -; CHECK-BE-NEXT: .long 0 @ double 0 -; CHECK-BE-NEXT: .long 0 -entry: - %c = icmp sgt <2 x i64> %b, zeroinitializer - %l = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %dest, i32 8, <2 x i1> %c, <2 x double> zeroinitializer) - ret <2 x double> %l -} - -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) -declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) -declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) -declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) -declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>) -declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_zero(<4 x i32> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_align4_zero: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_align4_zero: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q1, [r0] +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> zeroinitializer) + ret <4 x i32> %l +} + +define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_undef(<4 x i32> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_align4_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_align4_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q1, [r0] +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> undef) + ret <4 x i32> %l +} + +define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align1_undef(<4 x i32> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_align1_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_align1_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-BE-NEXT: vrev32.8 q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 1, <4 x i1> %c, <4 x i32> undef) + ret <4 x i32> %l +} + +define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_other(<4 x i32> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_align4_other: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q1, [r0] +; CHECK-LE-NEXT: vpsel q0, q1, q0 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_align4_other: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q1 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> %a) + ret <4 x i32> %l +} + +define arm_aapcs_vfpcc i8* @masked_v4i32_preinc(i8* %x, i8* %y, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_preinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_preinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i32>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define arm_aapcs_vfpcc i8* @masked_v4i32_postinc(i8* %x, i8* %y, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_postinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_postinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + + + +define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_zero(<8 x i16> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_align4_zero: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_align4_zero: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vmov.i32 q1, #0x0 +; CHECK-BE-NEXT: vrev64.16 q2, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr +; CHECK-BE-NEXT: vrev32.16 q1, q1 +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q1 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> zeroinitializer) + ret <8 x i16> %l +} + +define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_undef(<8 x i16> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_align4_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_align4_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q1, [r0] +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> undef) + ret <8 x i16> %l +} + +define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align1_undef(<8 x i16> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_align1_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_align1_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-BE-NEXT: vrev16.8 q1, q0 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 1, <8 x i1> %c, <8 x i16> undef) + ret <8 x i16> %l +} + +define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_other(<8 x i16> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_align4_other: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q1, [r0] +; CHECK-LE-NEXT: vpsel q0, q1, q0 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_align4_other: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q1 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> %a) + ret <8 x i16> %l +} + +define i8* @masked_v8i16_preinc(i8* %x, i8* %y, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_preinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_preinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 4, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 4 + ret i8* %z +} + +define arm_aapcs_vfpcc i8* @masked_v8i16_postinc(i8* %x, i8* %y, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_postinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_postinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 4, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 4 + ret i8* %z +} + + +define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_zero(<16 x i8> *%dest, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8_align4_zero: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8_align4_zero: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vmov.i32 q1, #0x0 +; CHECK-BE-NEXT: vrev64.8 q2, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q2, zr +; CHECK-BE-NEXT: vrev32.8 q1, q1 +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q1 +; CHECK-BE-NEXT: vrev64.8 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <16 x i8> %a, zeroinitializer + %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> zeroinitializer) + ret <16 x i8> %l +} + +define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_undef(<16 x i8> *%dest, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8_align4_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8_align4_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q1, [r0] +; CHECK-BE-NEXT: vrev64.8 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <16 x i8> %a, zeroinitializer + %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> undef) + ret <16 x i8> %l +} + +define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_other(<16 x i8> *%dest, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8_align4_other: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q1, [r0] +; CHECK-LE-NEXT: vpsel q0, q1, q0 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8_align4_other: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q1 +; CHECK-BE-NEXT: vrev64.8 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <16 x i8> %a, zeroinitializer + %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> %a) + ret <16 x i8> %l +} + +define arm_aapcs_vfpcc i8* @masked_v16i8_preinc(i8* %x, i8* %y, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8_preinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8_preinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrb.8 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <16 x i8>* + %c = icmp sgt <16 x i8> %a, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 4, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 4 + ret i8* %z +} + +define arm_aapcs_vfpcc i8* @masked_v16i8_postinc(i8* %x, i8* %y, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8_postinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8_postinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrb.8 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %c = icmp sgt <16 x i8> %a, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 4, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 4 + ret i8* %z +} + + +define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_zero(<4 x float> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4f32_align4_zero: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_align4_zero: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vmov.i32 q1, #0x0 +; CHECK-BE-NEXT: vcmp.s32 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q1 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> zeroinitializer) + ret <4 x float> %l +} + +define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_undef(<4 x float> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4f32_align4_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_align4_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q1, [r0] +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> undef) + ret <4 x float> %l +} + +define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align1_undef(<4 x float> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4f32_align1_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_align1_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-BE-NEXT: vrev32.8 q1, q0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 1, <4 x i1> %c, <4 x float> undef) + ret <4 x float> %l +} + +define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_other(<4 x float> *%dest, <4 x i32> %a, <4 x float> %b) { +; CHECK-LE-LABEL: masked_v4f32_align4_other: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_align4_other: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q2, q1 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q2 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> %b) + ret <4 x float> %l +} + +define arm_aapcs_vfpcc i8* @masked_v4f32_preinc(i8* %x, i8* %y, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4f32_preinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_preinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x float>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define arm_aapcs_vfpcc i8* @masked_v4f32_postinc(i8* %x, i8* %y, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4f32_postinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_postinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + + +define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_zero(<8 x half> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8f16_align4_zero: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vmov.i32 q1, #0x0 +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_align4_zero: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vmov.i32 q1, #0x0 +; CHECK-BE-NEXT: vrev64.16 q2, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr +; CHECK-BE-NEXT: vrev32.16 q1, q1 +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q1 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> zeroinitializer) + ret <8 x half> %l +} + +define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_undef(<8 x half> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8f16_align4_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_align4_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q1, [r0] +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> undef) + ret <8 x half> %l +} + +define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align1_undef(<8 x half> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8f16_align1_undef: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_align1_undef: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] +; CHECK-BE-NEXT: vrev16.8 q1, q0 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 1, <8 x i1> %c, <8 x half> undef) + ret <8 x half> %l +} + +define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_other(<8 x half> *%dest, <8 x i16> %a, <8 x half> %b) { +; CHECK-LE-LABEL: masked_v8f16_align4_other: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0] +; CHECK-LE-NEXT: vpsel q0, q0, q1 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_align4_other: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q2, q1 +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0] +; CHECK-BE-NEXT: vpsel q1, q0, q2 +; CHECK-BE-NEXT: vrev64.16 q0, q1 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> %b) + ret <8 x half> %l +} + +define arm_aapcs_vfpcc i8* @masked_v8f16_preinc(i8* %x, i8* %y, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8f16_preinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_preinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x half>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 4, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 4 + ret i8* %z +} + +define arm_aapcs_vfpcc i8* @masked_v8f16_postinc(i8* %x, i8* %y, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8f16_postinc: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vldrht.u16 q0, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_postinc: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vldrht.u16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 4, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 4 + ret i8* %z +} + + +define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 x i64> %a) { +; CHECK-LE-LABEL: masked_v2i64_align4_zero: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .pad #4 +; CHECK-LE-NEXT: sub sp, #4 +; CHECK-LE-NEXT: vmov r3, s0 +; CHECK-LE-NEXT: movs r2, #0 +; CHECK-LE-NEXT: vmov r1, s1 +; CHECK-LE-NEXT: vmov r12, s3 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: vmov r3, s2 +; CHECK-LE-NEXT: sbcs.w r1, r2, r1 +; CHECK-LE-NEXT: mov.w r1, #0 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r1, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: sbcs.w r3, r2, r12 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r2, #1 +; CHECK-LE-NEXT: cmp r2, #0 +; CHECK-LE-NEXT: it ne +; CHECK-LE-NEXT: mvnne r2, #1 +; CHECK-LE-NEXT: bfi r2, r1, #0, #1 +; CHECK-LE-NEXT: and r1, r2, #3 +; CHECK-LE-NEXT: lsls r2, r2, #31 +; CHECK-LE-NEXT: beq .LBB29_2 +; CHECK-LE-NEXT: @ %bb.1: @ %cond.load +; CHECK-LE-NEXT: vldr d1, .LCPI29_0 +; CHECK-LE-NEXT: vldr d0, [r0] +; CHECK-LE-NEXT: b .LBB29_3 +; CHECK-LE-NEXT: .LBB29_2: +; CHECK-LE-NEXT: vmov.i32 q0, #0x0 +; CHECK-LE-NEXT: .LBB29_3: @ %else +; CHECK-LE-NEXT: lsls r1, r1, #30 +; CHECK-LE-NEXT: it mi +; CHECK-LE-NEXT: vldrmi d1, [r0, #8] +; CHECK-LE-NEXT: add sp, #4 +; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: .p2align 3 +; CHECK-LE-NEXT: @ %bb.4: +; CHECK-LE-NEXT: .LCPI29_0: +; CHECK-LE-NEXT: .long 0 @ double 0 +; CHECK-LE-NEXT: .long 0 +; +; CHECK-BE-LABEL: masked_v2i64_align4_zero: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .pad #4 +; CHECK-BE-NEXT: sub sp, #4 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r2, #0 +; CHECK-BE-NEXT: vmov r3, s7 +; CHECK-BE-NEXT: vmov r1, s6 +; CHECK-BE-NEXT: vmov r12, s4 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: vmov r3, s5 +; CHECK-BE-NEXT: sbcs.w r1, r2, r1 +; CHECK-BE-NEXT: mov.w r1, #0 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r1, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: sbcs.w r3, r2, r12 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r2, #1 +; CHECK-BE-NEXT: cmp r2, #0 +; CHECK-BE-NEXT: it ne +; CHECK-BE-NEXT: mvnne r2, #1 +; CHECK-BE-NEXT: bfi r2, r1, #0, #1 +; CHECK-BE-NEXT: and r1, r2, #3 +; CHECK-BE-NEXT: lsls r2, r2, #31 +; CHECK-BE-NEXT: beq .LBB29_2 +; CHECK-BE-NEXT: @ %bb.1: @ %cond.load +; CHECK-BE-NEXT: vldr d1, .LCPI29_0 +; CHECK-BE-NEXT: vldr d0, [r0] +; CHECK-BE-NEXT: b .LBB29_3 +; CHECK-BE-NEXT: .LBB29_2: +; CHECK-BE-NEXT: vmov.i32 q1, #0x0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: .LBB29_3: @ %else +; CHECK-BE-NEXT: lsls r1, r1, #30 +; CHECK-BE-NEXT: it mi +; CHECK-BE-NEXT: vldrmi d1, [r0, #8] +; CHECK-BE-NEXT: add sp, #4 +; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: .p2align 3 +; CHECK-BE-NEXT: @ %bb.4: +; CHECK-BE-NEXT: .LCPI29_0: +; CHECK-BE-NEXT: .long 0 @ double 0 +; CHECK-BE-NEXT: .long 0 +entry: + %c = icmp sgt <2 x i64> %a, zeroinitializer + %l = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %dest, i32 8, <2 x i1> %c, <2 x i64> zeroinitializer) + ret <2 x i64> %l +} + +define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) { +; CHECK-LE-LABEL: masked_v2f64_align4_zero: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .pad #4 +; CHECK-LE-NEXT: sub sp, #4 +; CHECK-LE-NEXT: vmov r3, s4 +; CHECK-LE-NEXT: movs r2, #0 +; CHECK-LE-NEXT: vmov r1, s5 +; CHECK-LE-NEXT: vmov r12, s7 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: vmov r3, s6 +; CHECK-LE-NEXT: sbcs.w r1, r2, r1 +; CHECK-LE-NEXT: mov.w r1, #0 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r1, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: sbcs.w r3, r2, r12 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r2, #1 +; CHECK-LE-NEXT: cmp r2, #0 +; CHECK-LE-NEXT: it ne +; CHECK-LE-NEXT: mvnne r2, #1 +; CHECK-LE-NEXT: bfi r2, r1, #0, #1 +; CHECK-LE-NEXT: and r1, r2, #3 +; CHECK-LE-NEXT: lsls r2, r2, #31 +; CHECK-LE-NEXT: beq .LBB30_2 +; CHECK-LE-NEXT: @ %bb.1: @ %cond.load +; CHECK-LE-NEXT: vldr d1, .LCPI30_0 +; CHECK-LE-NEXT: vldr d0, [r0] +; CHECK-LE-NEXT: b .LBB30_3 +; CHECK-LE-NEXT: .LBB30_2: +; CHECK-LE-NEXT: vmov.i32 q0, #0x0 +; CHECK-LE-NEXT: .LBB30_3: @ %else +; CHECK-LE-NEXT: lsls r1, r1, #30 +; CHECK-LE-NEXT: it mi +; CHECK-LE-NEXT: vldrmi d1, [r0, #8] +; CHECK-LE-NEXT: add sp, #4 +; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: .p2align 3 +; CHECK-LE-NEXT: @ %bb.4: +; CHECK-LE-NEXT: .LCPI30_0: +; CHECK-LE-NEXT: .long 0 @ double 0 +; CHECK-LE-NEXT: .long 0 +; +; CHECK-BE-LABEL: masked_v2f64_align4_zero: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .pad #4 +; CHECK-BE-NEXT: sub sp, #4 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: movs r2, #0 +; CHECK-BE-NEXT: vmov r3, s3 +; CHECK-BE-NEXT: vmov r1, s2 +; CHECK-BE-NEXT: vmov r12, s0 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: vmov r3, s1 +; CHECK-BE-NEXT: sbcs.w r1, r2, r1 +; CHECK-BE-NEXT: mov.w r1, #0 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r1, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: sbcs.w r3, r2, r12 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r2, #1 +; CHECK-BE-NEXT: cmp r2, #0 +; CHECK-BE-NEXT: it ne +; CHECK-BE-NEXT: mvnne r2, #1 +; CHECK-BE-NEXT: bfi r2, r1, #0, #1 +; CHECK-BE-NEXT: and r1, r2, #3 +; CHECK-BE-NEXT: lsls r2, r2, #31 +; CHECK-BE-NEXT: beq .LBB30_2 +; CHECK-BE-NEXT: @ %bb.1: @ %cond.load +; CHECK-BE-NEXT: vldr d1, .LCPI30_0 +; CHECK-BE-NEXT: vldr d0, [r0] +; CHECK-BE-NEXT: b .LBB30_3 +; CHECK-BE-NEXT: .LBB30_2: +; CHECK-BE-NEXT: vmov.i32 q1, #0x0 +; CHECK-BE-NEXT: vrev64.32 q0, q1 +; CHECK-BE-NEXT: .LBB30_3: @ %else +; CHECK-BE-NEXT: lsls r1, r1, #30 +; CHECK-BE-NEXT: it mi +; CHECK-BE-NEXT: vldrmi d1, [r0, #8] +; CHECK-BE-NEXT: add sp, #4 +; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: .p2align 3 +; CHECK-BE-NEXT: @ %bb.4: +; CHECK-BE-NEXT: .LCPI30_0: +; CHECK-BE-NEXT: .long 0 @ double 0 +; CHECK-BE-NEXT: .long 0 +entry: + %c = icmp sgt <2 x i64> %b, zeroinitializer + %l = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %dest, i32 8, <2 x i1> %c, <2 x double> zeroinitializer) + ret <2 x double> %l +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) +declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>) +declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) diff --git a/test/CodeGen/Thumb2/mve-masked-store.ll b/test/CodeGen/Thumb2/mve-masked-store.ll dissimilarity index 81% index 9d777dbedfd..c649a70840e 100644 --- a/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/test/CodeGen/Thumb2/mve-masked-store.ll @@ -1,2862 +1,704 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE -; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE - -define arm_aapcs_vfpcc void @masked_v4i32(<4 x i32> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne r2, s0 -; CHECK-LE-NEXT: strne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r2, s1 -; CHECK-LE-NEXT: strmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r2, s2 -; CHECK-LE-NEXT: strmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r1, s3 -; CHECK-LE-NEXT: strmi r1, [r0, #12] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne r2, s4 -; CHECK-BE-NEXT: strne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r2, s5 -; CHECK-BE-NEXT: strmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r2, s6 -; CHECK-BE-NEXT: strmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r1, s7 -; CHECK-BE-NEXT: strmi r1, [r0, #12] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a, <4 x i32>* %dest, i32 4, <4 x i1> %c) - ret void -} - -define arm_aapcs_vfpcc void @masked_v4i32_align1(<4 x i32> *%dest, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_align1: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne r2, s0 -; CHECK-LE-NEXT: strne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r2, s1 -; CHECK-LE-NEXT: strmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r2, s2 -; CHECK-LE-NEXT: strmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r1, s3 -; CHECK-LE-NEXT: strmi r1, [r0, #12] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_align1: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne r2, s4 -; CHECK-BE-NEXT: strne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r2, s5 -; CHECK-BE-NEXT: strmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r2, s6 -; CHECK-BE-NEXT: strmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r1, s7 -; CHECK-BE-NEXT: strmi r1, [r0, #12] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <4 x i32> %a, zeroinitializer - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a, <4 x i32>* %dest, i32 1, <4 x i1> %c) - ret void -} - -define i8* @masked_v4i32_pre(i8* %y, i8* %x, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_pre: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r1, r2, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne r2, s0 -; CHECK-LE-NEXT: strne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r2, s1 -; CHECK-LE-NEXT: strmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r2, s2 -; CHECK-LE-NEXT: strmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r1, s3 -; CHECK-LE-NEXT: strmi r1, [r0, #12] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_pre: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vldrw.u32 q0, [r1] -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r1, r2, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne r2, s0 -; CHECK-BE-NEXT: strne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r2, s1 -; CHECK-BE-NEXT: strmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r2, s2 -; CHECK-BE-NEXT: strmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r1, s3 -; CHECK-BE-NEXT: strmi r1, [r0, #12] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 4 - %2 = bitcast i8* %z to <4 x i32>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) - ret i8* %z -} - -define i8* @masked_v4i32_post(i8* %y, i8* %x, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4i32_post: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r2, r2, #15 -; CHECK-LE-NEXT: lsls r1, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne r1, s0 -; CHECK-LE-NEXT: strne r1, [r0] -; CHECK-LE-NEXT: lsls r1, r2, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r1, s1 -; CHECK-LE-NEXT: strmi r1, [r0, #4] -; CHECK-LE-NEXT: adds r1, r0, #4 -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r3, s2 -; CHECK-LE-NEXT: strmi r3, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r2, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi r2, s3 -; CHECK-LE-NEXT: strmi r2, [r0, #12] -; CHECK-LE-NEXT: mov r0, r1 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4i32_post: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vldrw.u32 q0, [r1] -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r2, r2, #15 -; CHECK-BE-NEXT: lsls r1, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne r1, s0 -; CHECK-BE-NEXT: strne r1, [r0] -; CHECK-BE-NEXT: lsls r1, r2, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r1, s1 -; CHECK-BE-NEXT: strmi r1, [r0, #4] -; CHECK-BE-NEXT: adds r1, r0, #4 -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r3, s2 -; CHECK-BE-NEXT: strmi r3, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r2, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi r2, s3 -; CHECK-BE-NEXT: strmi r2, [r0, #12] -; CHECK-BE-NEXT: mov r0, r1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 4 - %2 = bitcast i8* %y to <4 x i32>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) - ret i8* %z -} - - -define arm_aapcs_vfpcc void @masked_v8i16(<8 x i16> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne.u16 r2, q0[0] -; CHECK-LE-NEXT: strhne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-LE-NEXT: strhmi r2, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-LE-NEXT: strhmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-LE-NEXT: strhmi r2, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-LE-NEXT: strhmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-LE-NEXT: strhmi r2, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-LE-NEXT: strhmi r2, [r0, #12] -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-LE-NEXT: strhmi r1, [r0, #14] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne.u16 r2, q1[0] -; CHECK-BE-NEXT: strhne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[1] -; CHECK-BE-NEXT: strhmi r2, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[2] -; CHECK-BE-NEXT: strhmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[3] -; CHECK-BE-NEXT: strhmi r2, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[4] -; CHECK-BE-NEXT: strhmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[5] -; CHECK-BE-NEXT: strhmi r2, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[6] -; CHECK-BE-NEXT: strhmi r2, [r0, #12] -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q1[7] -; CHECK-BE-NEXT: strhmi r1, [r0, #14] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %a, <8 x i16>* %dest, i32 2, <8 x i1> %c) - ret void -} - -define arm_aapcs_vfpcc void @masked_v8i16_align1(<8 x i16> *%dest, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_align1: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne.u16 r2, q0[0] -; CHECK-LE-NEXT: strhne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-LE-NEXT: strhmi r2, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-LE-NEXT: strhmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-LE-NEXT: strhmi r2, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-LE-NEXT: strhmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-LE-NEXT: strhmi r2, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-LE-NEXT: strhmi r2, [r0, #12] -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-LE-NEXT: strhmi r1, [r0, #14] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_align1: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne.u16 r2, q1[0] -; CHECK-BE-NEXT: strhne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[1] -; CHECK-BE-NEXT: strhmi r2, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[2] -; CHECK-BE-NEXT: strhmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[3] -; CHECK-BE-NEXT: strhmi r2, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[4] -; CHECK-BE-NEXT: strhmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[5] -; CHECK-BE-NEXT: strhmi r2, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q1[6] -; CHECK-BE-NEXT: strhmi r2, [r0, #12] -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q1[7] -; CHECK-BE-NEXT: strhmi r1, [r0, #14] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <8 x i16> %a, zeroinitializer - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %a, <8 x i16>* %dest, i32 1, <8 x i1> %c) - ret void -} - -define i8* @masked_v8i16_pre(i8* %y, i8* %x, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_pre: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r2, r3, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne.u16 r2, q0[0] -; CHECK-LE-NEXT: strhne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-LE-NEXT: strhmi r2, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-LE-NEXT: strhmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-LE-NEXT: strhmi r2, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-LE-NEXT: strhmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-LE-NEXT: strhmi r2, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-LE-NEXT: strhmi r2, [r0, #12] -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-LE-NEXT: strhmi r1, [r0, #14] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_pre: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vldrh.u16 q0, [r1] -; CHECK-BE-NEXT: bfi r2, r3, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne.u16 r2, q0[0] -; CHECK-BE-NEXT: strhne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q0[1] -; CHECK-BE-NEXT: strhmi r2, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q0[2] -; CHECK-BE-NEXT: strhmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q0[3] -; CHECK-BE-NEXT: strhmi r2, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q0[4] -; CHECK-BE-NEXT: strhmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q0[5] -; CHECK-BE-NEXT: strhmi r2, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q0[6] -; CHECK-BE-NEXT: strhmi r2, [r0, #12] -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q0[7] -; CHECK-BE-NEXT: strhmi r1, [r0, #14] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 4 - %2 = bitcast i8* %z to <8 x i16>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) - ret i8* %z -} - -define i8* @masked_v8i16_post(i8* %y, i8* %x, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8i16_post: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r2, r12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #0, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #1, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #2, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #3, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #4, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #5, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #6, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r3, r2, #7, #1 -; CHECK-LE-NEXT: lsls r1, r3, #31 -; CHECK-LE-NEXT: uxtb r2, r3 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne.u16 r1, q0[0] -; CHECK-LE-NEXT: strhne r1, [r0] -; CHECK-LE-NEXT: lsls r1, r2, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[1] -; CHECK-LE-NEXT: strhmi r1, [r0, #2] -; CHECK-LE-NEXT: lsls r1, r2, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[2] -; CHECK-LE-NEXT: strhmi r1, [r0, #4] -; CHECK-LE-NEXT: lsls r1, r2, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[3] -; CHECK-LE-NEXT: strhmi r1, [r0, #6] -; CHECK-LE-NEXT: lsls r1, r2, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[4] -; CHECK-LE-NEXT: strhmi r1, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r2, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r1, q0[5] -; CHECK-LE-NEXT: strhmi r1, [r0, #10] -; CHECK-LE-NEXT: lsls r3, r2, #25 -; CHECK-LE-NEXT: add.w r1, r0, #4 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r3, q0[6] -; CHECK-LE-NEXT: strhmi r3, [r0, #12] -; CHECK-LE-NEXT: lsls r2, r2, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u16 r2, q0[7] -; CHECK-LE-NEXT: strhmi r2, [r0, #14] -; CHECK-LE-NEXT: mov r0, r1 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8i16_post: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r3, #0 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r2, r12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #2, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #3, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #4, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #5, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #6, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: vldrh.u16 q0, [r1] -; CHECK-BE-NEXT: bfi r3, r2, #7, #1 -; CHECK-BE-NEXT: lsls r1, r3, #31 -; CHECK-BE-NEXT: uxtb r2, r3 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne.u16 r1, q0[0] -; CHECK-BE-NEXT: strhne r1, [r0] -; CHECK-BE-NEXT: lsls r1, r2, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q0[1] -; CHECK-BE-NEXT: strhmi r1, [r0, #2] -; CHECK-BE-NEXT: lsls r1, r2, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q0[2] -; CHECK-BE-NEXT: strhmi r1, [r0, #4] -; CHECK-BE-NEXT: lsls r1, r2, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q0[3] -; CHECK-BE-NEXT: strhmi r1, [r0, #6] -; CHECK-BE-NEXT: lsls r1, r2, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q0[4] -; CHECK-BE-NEXT: strhmi r1, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r2, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r1, q0[5] -; CHECK-BE-NEXT: strhmi r1, [r0, #10] -; CHECK-BE-NEXT: lsls r3, r2, #25 -; CHECK-BE-NEXT: add.w r1, r0, #4 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r3, q0[6] -; CHECK-BE-NEXT: strhmi r3, [r0, #12] -; CHECK-BE-NEXT: lsls r2, r2, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u16 r2, q0[7] -; CHECK-BE-NEXT: strhmi r2, [r0, #14] -; CHECK-BE-NEXT: mov r0, r1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <8 x i16>* - %1 = load <8 x i16>, <8 x i16>* %0, align 4 - %2 = bitcast i8* %y to <8 x i16>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) - ret i8* %z -} - - -define arm_aapcs_vfpcc void @masked_v16i8(<16 x i8> *%dest, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: uxth r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne.u8 r2, q0[0] -; CHECK-LE-NEXT: strbne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[1] -; CHECK-LE-NEXT: strbmi r2, [r0, #1] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[2] -; CHECK-LE-NEXT: strbmi r2, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[3] -; CHECK-LE-NEXT: strbmi r2, [r0, #3] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[4] -; CHECK-LE-NEXT: strbmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[5] -; CHECK-LE-NEXT: strbmi r2, [r0, #5] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[6] -; CHECK-LE-NEXT: strbmi r2, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[7] -; CHECK-LE-NEXT: strbmi r2, [r0, #7] -; CHECK-LE-NEXT: lsls r2, r1, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[8] -; CHECK-LE-NEXT: strbmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[9] -; CHECK-LE-NEXT: strbmi r2, [r0, #9] -; CHECK-LE-NEXT: lsls r2, r1, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[10] -; CHECK-LE-NEXT: strbmi r2, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[11] -; CHECK-LE-NEXT: strbmi r2, [r0, #11] -; CHECK-LE-NEXT: lsls r2, r1, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[12] -; CHECK-LE-NEXT: strbmi r2, [r0, #12] -; CHECK-LE-NEXT: lsls r2, r1, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[13] -; CHECK-LE-NEXT: strbmi r2, [r0, #13] -; CHECK-LE-NEXT: lsls r2, r1, #17 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[14] -; CHECK-LE-NEXT: strbmi r2, [r0, #14] -; CHECK-LE-NEXT: lsls r1, r1, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[15] -; CHECK-LE-NEXT: strbmi r1, [r0, #15] -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: uxth r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne.u8 r2, q1[0] -; CHECK-BE-NEXT: strbne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[1] -; CHECK-BE-NEXT: strbmi r2, [r0, #1] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[2] -; CHECK-BE-NEXT: strbmi r2, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[3] -; CHECK-BE-NEXT: strbmi r2, [r0, #3] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[4] -; CHECK-BE-NEXT: strbmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[5] -; CHECK-BE-NEXT: strbmi r2, [r0, #5] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[6] -; CHECK-BE-NEXT: strbmi r2, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[7] -; CHECK-BE-NEXT: strbmi r2, [r0, #7] -; CHECK-BE-NEXT: lsls r2, r1, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[8] -; CHECK-BE-NEXT: strbmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[9] -; CHECK-BE-NEXT: strbmi r2, [r0, #9] -; CHECK-BE-NEXT: lsls r2, r1, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[10] -; CHECK-BE-NEXT: strbmi r2, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[11] -; CHECK-BE-NEXT: strbmi r2, [r0, #11] -; CHECK-BE-NEXT: lsls r2, r1, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[12] -; CHECK-BE-NEXT: strbmi r2, [r0, #12] -; CHECK-BE-NEXT: lsls r2, r1, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[13] -; CHECK-BE-NEXT: strbmi r2, [r0, #13] -; CHECK-BE-NEXT: lsls r2, r1, #17 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q1[14] -; CHECK-BE-NEXT: strbmi r2, [r0, #14] -; CHECK-BE-NEXT: lsls r1, r1, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q1[15] -; CHECK-BE-NEXT: strbmi r1, [r0, #15] -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %c = icmp sgt <16 x i8> %a, zeroinitializer - call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %a, <16 x i8>* %dest, i32 1, <16 x i1> %c) - ret void -} - -define i8* @masked_v16i8_pre(i8* %y, i8* %x, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8_pre: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vldr d1, [r7, #8] -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: uxth r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne.u8 r2, q0[0] -; CHECK-LE-NEXT: strbne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[1] -; CHECK-LE-NEXT: strbmi r2, [r0, #1] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[2] -; CHECK-LE-NEXT: strbmi r2, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[3] -; CHECK-LE-NEXT: strbmi r2, [r0, #3] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[4] -; CHECK-LE-NEXT: strbmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[5] -; CHECK-LE-NEXT: strbmi r2, [r0, #5] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[6] -; CHECK-LE-NEXT: strbmi r2, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[7] -; CHECK-LE-NEXT: strbmi r2, [r0, #7] -; CHECK-LE-NEXT: lsls r2, r1, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[8] -; CHECK-LE-NEXT: strbmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[9] -; CHECK-LE-NEXT: strbmi r2, [r0, #9] -; CHECK-LE-NEXT: lsls r2, r1, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[10] -; CHECK-LE-NEXT: strbmi r2, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[11] -; CHECK-LE-NEXT: strbmi r2, [r0, #11] -; CHECK-LE-NEXT: lsls r2, r1, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[12] -; CHECK-LE-NEXT: strbmi r2, [r0, #12] -; CHECK-LE-NEXT: lsls r2, r1, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[13] -; CHECK-LE-NEXT: strbmi r2, [r0, #13] -; CHECK-LE-NEXT: lsls r2, r1, #17 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[14] -; CHECK-LE-NEXT: strbmi r2, [r0, #14] -; CHECK-LE-NEXT: lsls r1, r1, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[15] -; CHECK-LE-NEXT: strbmi r1, [r0, #15] -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8_pre: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vldr d1, [r7, #8] -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: vldrb.u8 q0, [r1] -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: uxth r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne.u8 r2, q0[0] -; CHECK-BE-NEXT: strbne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[1] -; CHECK-BE-NEXT: strbmi r2, [r0, #1] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[2] -; CHECK-BE-NEXT: strbmi r2, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[3] -; CHECK-BE-NEXT: strbmi r2, [r0, #3] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[4] -; CHECK-BE-NEXT: strbmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[5] -; CHECK-BE-NEXT: strbmi r2, [r0, #5] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[6] -; CHECK-BE-NEXT: strbmi r2, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[7] -; CHECK-BE-NEXT: strbmi r2, [r0, #7] -; CHECK-BE-NEXT: lsls r2, r1, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[8] -; CHECK-BE-NEXT: strbmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[9] -; CHECK-BE-NEXT: strbmi r2, [r0, #9] -; CHECK-BE-NEXT: lsls r2, r1, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[10] -; CHECK-BE-NEXT: strbmi r2, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[11] -; CHECK-BE-NEXT: strbmi r2, [r0, #11] -; CHECK-BE-NEXT: lsls r2, r1, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[12] -; CHECK-BE-NEXT: strbmi r2, [r0, #12] -; CHECK-BE-NEXT: lsls r2, r1, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[13] -; CHECK-BE-NEXT: strbmi r2, [r0, #13] -; CHECK-BE-NEXT: lsls r2, r1, #17 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[14] -; CHECK-BE-NEXT: strbmi r2, [r0, #14] -; CHECK-BE-NEXT: lsls r1, r1, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[15] -; CHECK-BE-NEXT: strbmi r1, [r0, #15] -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 4 - %2 = bitcast i8* %z to <16 x i8>* - %c = icmp sgt <16 x i8> %a, zeroinitializer - call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) - ret i8* %z -} - -define i8* @masked_v16i8_post(i8* %y, i8* %x, <16 x i8> %a) { -; CHECK-LE-LABEL: masked_v16i8_post: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .save {r4, r6, r7, lr} -; CHECK-LE-NEXT: push {r4, r6, r7, lr} -; CHECK-LE-NEXT: .setfp r7, sp, #8 -; CHECK-LE-NEXT: add r7, sp, #8 -; CHECK-LE-NEXT: .pad #16 -; CHECK-LE-NEXT: sub sp, #16 -; CHECK-LE-NEXT: mov r4, sp -; CHECK-LE-NEXT: bfc r4, #0, #4 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: vldr d1, [r7, #8] -; CHECK-LE-NEXT: sub.w r4, r7, #8 -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: uxth r2, r1 -; CHECK-LE-NEXT: lsls r1, r1, #31 -; CHECK-LE-NEXT: itt ne -; CHECK-LE-NEXT: vmovne.u8 r1, q0[0] -; CHECK-LE-NEXT: strbne r1, [r0] -; CHECK-LE-NEXT: lsls r1, r2, #30 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[1] -; CHECK-LE-NEXT: strbmi r1, [r0, #1] -; CHECK-LE-NEXT: lsls r1, r2, #29 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[2] -; CHECK-LE-NEXT: strbmi r1, [r0, #2] -; CHECK-LE-NEXT: lsls r1, r2, #28 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[3] -; CHECK-LE-NEXT: strbmi r1, [r0, #3] -; CHECK-LE-NEXT: lsls r1, r2, #27 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[4] -; CHECK-LE-NEXT: strbmi r1, [r0, #4] -; CHECK-LE-NEXT: lsls r1, r2, #26 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[5] -; CHECK-LE-NEXT: strbmi r1, [r0, #5] -; CHECK-LE-NEXT: lsls r1, r2, #25 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[6] -; CHECK-LE-NEXT: strbmi r1, [r0, #6] -; CHECK-LE-NEXT: lsls r1, r2, #24 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[7] -; CHECK-LE-NEXT: strbmi r1, [r0, #7] -; CHECK-LE-NEXT: lsls r1, r2, #23 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[8] -; CHECK-LE-NEXT: strbmi r1, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r2, #22 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[9] -; CHECK-LE-NEXT: strbmi r1, [r0, #9] -; CHECK-LE-NEXT: lsls r1, r2, #21 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[10] -; CHECK-LE-NEXT: strbmi r1, [r0, #10] -; CHECK-LE-NEXT: lsls r1, r2, #20 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[11] -; CHECK-LE-NEXT: strbmi r1, [r0, #11] -; CHECK-LE-NEXT: lsls r1, r2, #19 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[12] -; CHECK-LE-NEXT: strbmi r1, [r0, #12] -; CHECK-LE-NEXT: lsls r1, r2, #18 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r1, q0[13] -; CHECK-LE-NEXT: strbmi r1, [r0, #13] -; CHECK-LE-NEXT: lsls r3, r2, #17 -; CHECK-LE-NEXT: add.w r1, r0, #4 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r3, q0[14] -; CHECK-LE-NEXT: strbmi r3, [r0, #14] -; CHECK-LE-NEXT: lsls r2, r2, #16 -; CHECK-LE-NEXT: itt mi -; CHECK-LE-NEXT: vmovmi.u8 r2, q0[15] -; CHECK-LE-NEXT: strbmi r2, [r0, #15] -; CHECK-LE-NEXT: mov r0, r1 -; CHECK-LE-NEXT: mov sp, r4 -; CHECK-LE-NEXT: pop {r4, r6, r7, pc} -; -; CHECK-BE-LABEL: masked_v16i8_post: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r6, r7, lr} -; CHECK-BE-NEXT: .setfp r7, sp, #8 -; CHECK-BE-NEXT: add r7, sp, #8 -; CHECK-BE-NEXT: .pad #16 -; CHECK-BE-NEXT: sub sp, #16 -; CHECK-BE-NEXT: mov r4, sp -; CHECK-BE-NEXT: bfc r4, #0, #4 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: vldr d1, [r7, #8] -; CHECK-BE-NEXT: sub.w r4, r7, #8 -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: vrev64.8 q1, q0 -; CHECK-BE-NEXT: vldrb.u8 q0, [r1] -; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: uxth r2, r1 -; CHECK-BE-NEXT: lsls r1, r1, #31 -; CHECK-BE-NEXT: itt ne -; CHECK-BE-NEXT: vmovne.u8 r1, q0[0] -; CHECK-BE-NEXT: strbne r1, [r0] -; CHECK-BE-NEXT: lsls r1, r2, #30 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[1] -; CHECK-BE-NEXT: strbmi r1, [r0, #1] -; CHECK-BE-NEXT: lsls r1, r2, #29 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[2] -; CHECK-BE-NEXT: strbmi r1, [r0, #2] -; CHECK-BE-NEXT: lsls r1, r2, #28 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[3] -; CHECK-BE-NEXT: strbmi r1, [r0, #3] -; CHECK-BE-NEXT: lsls r1, r2, #27 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[4] -; CHECK-BE-NEXT: strbmi r1, [r0, #4] -; CHECK-BE-NEXT: lsls r1, r2, #26 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[5] -; CHECK-BE-NEXT: strbmi r1, [r0, #5] -; CHECK-BE-NEXT: lsls r1, r2, #25 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[6] -; CHECK-BE-NEXT: strbmi r1, [r0, #6] -; CHECK-BE-NEXT: lsls r1, r2, #24 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[7] -; CHECK-BE-NEXT: strbmi r1, [r0, #7] -; CHECK-BE-NEXT: lsls r1, r2, #23 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[8] -; CHECK-BE-NEXT: strbmi r1, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r2, #22 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[9] -; CHECK-BE-NEXT: strbmi r1, [r0, #9] -; CHECK-BE-NEXT: lsls r1, r2, #21 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[10] -; CHECK-BE-NEXT: strbmi r1, [r0, #10] -; CHECK-BE-NEXT: lsls r1, r2, #20 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[11] -; CHECK-BE-NEXT: strbmi r1, [r0, #11] -; CHECK-BE-NEXT: lsls r1, r2, #19 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[12] -; CHECK-BE-NEXT: strbmi r1, [r0, #12] -; CHECK-BE-NEXT: lsls r1, r2, #18 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r1, q0[13] -; CHECK-BE-NEXT: strbmi r1, [r0, #13] -; CHECK-BE-NEXT: lsls r3, r2, #17 -; CHECK-BE-NEXT: add.w r1, r0, #4 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r3, q0[14] -; CHECK-BE-NEXT: strbmi r3, [r0, #14] -; CHECK-BE-NEXT: lsls r2, r2, #16 -; CHECK-BE-NEXT: itt mi -; CHECK-BE-NEXT: vmovmi.u8 r2, q0[15] -; CHECK-BE-NEXT: strbmi r2, [r0, #15] -; CHECK-BE-NEXT: mov r0, r1 -; CHECK-BE-NEXT: mov sp, r4 -; CHECK-BE-NEXT: pop {r4, r6, r7, pc} -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <16 x i8>* - %1 = load <16 x i8>, <16 x i8>* %0, align 4 - %2 = bitcast i8* %y to <16 x i8>* - %c = icmp sgt <16 x i8> %a, zeroinitializer - call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) - ret i8* %z -} - - -define arm_aapcs_vfpcc void @masked_v4f32(<4 x float> *%dest, <4 x float> %a, <4 x i32> %b) { -; CHECK-LE-LABEL: masked_v4f32: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vstrne s0, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s1, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s3, [r0, #12] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q2, q1 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vstrne s4, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s5, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s6, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s7, [r0, #12] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp ugt <4 x i32> %b, zeroinitializer - call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %a, <4 x float>* %dest, i32 4, <4 x i1> %c) - ret void -} - -define arm_aapcs_vfpcc void @masked_v4f32_align1(<4 x float> *%dest, <4 x float> %a, <4 x i32> %b) { -; CHECK-LE-LABEL: masked_v4f32_align1: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #20 -; CHECK-LE-NEXT: sub sp, #20 -; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr -; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmrs r2, p0 -; CHECK-LE-NEXT: and r3, r2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r1, r3, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r1, r2, #3, #1 -; CHECK-LE-NEXT: and r1, r1, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: ittt ne -; CHECK-LE-NEXT: vstrne s0, [sp, #12] -; CHECK-LE-NEXT: ldrne r2, [sp, #12] -; CHECK-LE-NEXT: strne r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: ittt mi -; CHECK-LE-NEXT: vstrmi s1, [sp, #8] -; CHECK-LE-NEXT: ldrmi r2, [sp, #8] -; CHECK-LE-NEXT: strmi r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: ittt mi -; CHECK-LE-NEXT: vstrmi s2, [sp, #4] -; CHECK-LE-NEXT: ldrmi r2, [sp, #4] -; CHECK-LE-NEXT: strmi r2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: ittt mi -; CHECK-LE-NEXT: vstrmi s3, [sp] -; CHECK-LE-NEXT: ldrmi r1, [sp] -; CHECK-LE-NEXT: strmi r1, [r0, #12] -; CHECK-LE-NEXT: add sp, #20 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_align1: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #20 -; CHECK-BE-NEXT: sub sp, #20 -; CHECK-BE-NEXT: vrev64.32 q2, q1 -; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r1, r2, #3, #1 -; CHECK-BE-NEXT: and r1, r1, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: ittt ne -; CHECK-BE-NEXT: vstrne s4, [sp, #12] -; CHECK-BE-NEXT: ldrne r2, [sp, #12] -; CHECK-BE-NEXT: strne r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: ittt mi -; CHECK-BE-NEXT: vstrmi s5, [sp, #8] -; CHECK-BE-NEXT: ldrmi r2, [sp, #8] -; CHECK-BE-NEXT: strmi r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: ittt mi -; CHECK-BE-NEXT: vstrmi s6, [sp, #4] -; CHECK-BE-NEXT: ldrmi r2, [sp, #4] -; CHECK-BE-NEXT: strmi r2, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: ittt mi -; CHECK-BE-NEXT: vstrmi s7, [sp] -; CHECK-BE-NEXT: ldrmi r1, [sp] -; CHECK-BE-NEXT: strmi r1, [r0, #12] -; CHECK-BE-NEXT: add sp, #20 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp ugt <4 x i32> %b, zeroinitializer - call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %a, <4 x float>* %dest, i32 1, <4 x i1> %c) - ret void -} - -define i8* @masked_v4f32_pre(i8* %y, i8* %x, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4f32_pre: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r1, r2, #15 -; CHECK-LE-NEXT: lsls r2, r1, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vstrne s0, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s1, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r1, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s3, [r0, #12] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_pre: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vldrw.u32 q0, [r1] -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r1, r2, #15 -; CHECK-BE-NEXT: lsls r2, r1, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vstrne s0, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s1, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s2, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r1, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s3, [r0, #12] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 4 - %2 = bitcast i8* %z to <4 x float>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) - ret i8* %z -} - -define i8* @masked_v4f32_post(i8* %y, i8* %x, <4 x i32> %a) { -; CHECK-LE-LABEL: masked_v4f32_post: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: and r2, r2, #15 -; CHECK-LE-NEXT: lsls r1, r2, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vstrne s0, [r0] -; CHECK-LE-NEXT: lsls r1, r2, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s1, [r0, #4] -; CHECK-LE-NEXT: adds r1, r0, #4 -; CHECK-LE-NEXT: lsls r3, r2, #29 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r2, #28 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi s3, [r0, #12] -; CHECK-LE-NEXT: mov r0, r1 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v4f32_post: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vldrw.u32 q0, [r1] -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: and r2, r2, #15 -; CHECK-BE-NEXT: lsls r1, r2, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vstrne s0, [r0] -; CHECK-BE-NEXT: lsls r1, r2, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s1, [r0, #4] -; CHECK-BE-NEXT: adds r1, r0, #4 -; CHECK-BE-NEXT: lsls r3, r2, #29 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r2, #28 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi s3, [r0, #12] -; CHECK-BE-NEXT: mov r0, r1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <4 x float>* - %1 = load <4 x float>, <4 x float>* %0, align 4 - %2 = bitcast i8* %y to <4 x float>* - %c = icmp sgt <4 x i32> %a, zeroinitializer - call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) - ret i8* %z -} - - -define arm_aapcs_vfpcc void @masked_v8f16(<8 x half> *%dest, <8 x half> %a, <8 x i16> %b) { -; CHECK-LE-LABEL: masked_v8f16: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: bne .LBB15_9 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bmi .LBB15_10 -; CHECK-LE-NEXT: .LBB15_2: @ %else2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bmi .LBB15_11 -; CHECK-LE-NEXT: .LBB15_3: @ %else4 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bmi .LBB15_12 -; CHECK-LE-NEXT: .LBB15_4: @ %else6 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bmi .LBB15_13 -; CHECK-LE-NEXT: .LBB15_5: @ %else8 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bmi .LBB15_14 -; CHECK-LE-NEXT: .LBB15_6: @ %else10 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bmi .LBB15_15 -; CHECK-LE-NEXT: .LBB15_7: @ %else12 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bmi .LBB15_16 -; CHECK-LE-NEXT: .LBB15_8: @ %else14 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB15_9: @ %cond.store -; CHECK-LE-NEXT: vstr.16 s0, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bpl .LBB15_2 -; CHECK-LE-NEXT: .LBB15_10: @ %cond.store1 -; CHECK-LE-NEXT: vmovx.f16 s4, s0 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bpl .LBB15_3 -; CHECK-LE-NEXT: .LBB15_11: @ %cond.store3 -; CHECK-LE-NEXT: vstr.16 s1, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bpl .LBB15_4 -; CHECK-LE-NEXT: .LBB15_12: @ %cond.store5 -; CHECK-LE-NEXT: vmovx.f16 s4, s1 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bpl .LBB15_5 -; CHECK-LE-NEXT: .LBB15_13: @ %cond.store7 -; CHECK-LE-NEXT: vstr.16 s2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bpl .LBB15_6 -; CHECK-LE-NEXT: .LBB15_14: @ %cond.store9 -; CHECK-LE-NEXT: vmovx.f16 s4, s2 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bpl .LBB15_7 -; CHECK-LE-NEXT: .LBB15_15: @ %cond.store11 -; CHECK-LE-NEXT: vstr.16 s3, [r0, #12] -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bpl .LBB15_8 -; CHECK-LE-NEXT: .LBB15_16: @ %cond.store13 -; CHECK-LE-NEXT: vmovx.f16 s0, s3 -; CHECK-LE-NEXT: vstr.16 s0, [r0, #14] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8f16: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vrev64.16 q2, q1 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: bne .LBB15_9 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bmi .LBB15_10 -; CHECK-BE-NEXT: .LBB15_2: @ %else2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bmi .LBB15_11 -; CHECK-BE-NEXT: .LBB15_3: @ %else4 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bmi .LBB15_12 -; CHECK-BE-NEXT: .LBB15_4: @ %else6 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bmi .LBB15_13 -; CHECK-BE-NEXT: .LBB15_5: @ %else8 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bmi .LBB15_14 -; CHECK-BE-NEXT: .LBB15_6: @ %else10 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bmi .LBB15_15 -; CHECK-BE-NEXT: .LBB15_7: @ %else12 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bmi .LBB15_16 -; CHECK-BE-NEXT: .LBB15_8: @ %else14 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB15_9: @ %cond.store -; CHECK-BE-NEXT: vstr.16 s4, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bpl .LBB15_2 -; CHECK-BE-NEXT: .LBB15_10: @ %cond.store1 -; CHECK-BE-NEXT: vmovx.f16 s0, s4 -; CHECK-BE-NEXT: vstr.16 s0, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bpl .LBB15_3 -; CHECK-BE-NEXT: .LBB15_11: @ %cond.store3 -; CHECK-BE-NEXT: vstr.16 s5, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bpl .LBB15_4 -; CHECK-BE-NEXT: .LBB15_12: @ %cond.store5 -; CHECK-BE-NEXT: vmovx.f16 s0, s5 -; CHECK-BE-NEXT: vstr.16 s0, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bpl .LBB15_5 -; CHECK-BE-NEXT: .LBB15_13: @ %cond.store7 -; CHECK-BE-NEXT: vstr.16 s6, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bpl .LBB15_6 -; CHECK-BE-NEXT: .LBB15_14: @ %cond.store9 -; CHECK-BE-NEXT: vmovx.f16 s0, s6 -; CHECK-BE-NEXT: vstr.16 s0, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bpl .LBB15_7 -; CHECK-BE-NEXT: .LBB15_15: @ %cond.store11 -; CHECK-BE-NEXT: vstr.16 s7, [r0, #12] -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bpl .LBB15_8 -; CHECK-BE-NEXT: .LBB15_16: @ %cond.store13 -; CHECK-BE-NEXT: vmovx.f16 s0, s7 -; CHECK-BE-NEXT: vstr.16 s0, [r0, #14] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp ugt <8 x i16> %b, zeroinitializer - call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %a, <8 x half>* %dest, i32 2, <8 x i1> %c) - ret void -} - -define arm_aapcs_vfpcc void @masked_v8f16_align1(<8 x half> *%dest, <8 x half> %a, <8 x i16> %b) { -; CHECK-LE-LABEL: masked_v8f16_align1: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #40 -; CHECK-LE-NEXT: sub sp, #40 -; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vmrs r1, p0 -; CHECK-LE-NEXT: and r3, r1, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: rsbs r1, r1, #0 -; CHECK-LE-NEXT: bfi r2, r1, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: bne .LBB16_9 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bmi .LBB16_10 -; CHECK-LE-NEXT: .LBB16_2: @ %else2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bmi .LBB16_11 -; CHECK-LE-NEXT: .LBB16_3: @ %else4 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bmi .LBB16_12 -; CHECK-LE-NEXT: .LBB16_4: @ %else6 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bmi .LBB16_13 -; CHECK-LE-NEXT: .LBB16_5: @ %else8 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bmi .LBB16_14 -; CHECK-LE-NEXT: .LBB16_6: @ %else10 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bmi .LBB16_15 -; CHECK-LE-NEXT: .LBB16_7: @ %else12 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bmi .LBB16_16 -; CHECK-LE-NEXT: .LBB16_8: @ %else14 -; CHECK-LE-NEXT: add sp, #40 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB16_9: @ %cond.store -; CHECK-LE-NEXT: vstr.16 s0, [sp, #28] -; CHECK-LE-NEXT: ldrh.w r2, [sp, #28] -; CHECK-LE-NEXT: strh r2, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bpl .LBB16_2 -; CHECK-LE-NEXT: .LBB16_10: @ %cond.store1 -; CHECK-LE-NEXT: vmovx.f16 s4, s0 -; CHECK-LE-NEXT: vstr.16 s4, [sp, #24] -; CHECK-LE-NEXT: ldrh.w r2, [sp, #24] -; CHECK-LE-NEXT: strh r2, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bpl .LBB16_3 -; CHECK-LE-NEXT: .LBB16_11: @ %cond.store3 -; CHECK-LE-NEXT: vstr.16 s1, [sp, #20] -; CHECK-LE-NEXT: ldrh.w r2, [sp, #20] -; CHECK-LE-NEXT: strh r2, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bpl .LBB16_4 -; CHECK-LE-NEXT: .LBB16_12: @ %cond.store5 -; CHECK-LE-NEXT: vmovx.f16 s4, s1 -; CHECK-LE-NEXT: vstr.16 s4, [sp, #16] -; CHECK-LE-NEXT: ldrh.w r2, [sp, #16] -; CHECK-LE-NEXT: strh r2, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bpl .LBB16_5 -; CHECK-LE-NEXT: .LBB16_13: @ %cond.store7 -; CHECK-LE-NEXT: vstr.16 s2, [sp, #12] -; CHECK-LE-NEXT: ldrh.w r2, [sp, #12] -; CHECK-LE-NEXT: strh r2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bpl .LBB16_6 -; CHECK-LE-NEXT: .LBB16_14: @ %cond.store9 -; CHECK-LE-NEXT: vmovx.f16 s4, s2 -; CHECK-LE-NEXT: vstr.16 s4, [sp, #8] -; CHECK-LE-NEXT: ldrh.w r2, [sp, #8] -; CHECK-LE-NEXT: strh r2, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bpl .LBB16_7 -; CHECK-LE-NEXT: .LBB16_15: @ %cond.store11 -; CHECK-LE-NEXT: vstr.16 s3, [sp, #4] -; CHECK-LE-NEXT: ldrh.w r2, [sp, #4] -; CHECK-LE-NEXT: strh r2, [r0, #12] -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bpl .LBB16_8 -; CHECK-LE-NEXT: .LBB16_16: @ %cond.store13 -; CHECK-LE-NEXT: vmovx.f16 s0, s3 -; CHECK-LE-NEXT: vstr.16 s0, [sp] -; CHECK-LE-NEXT: ldrh.w r1, [sp] -; CHECK-LE-NEXT: strh r1, [r0, #14] -; CHECK-LE-NEXT: add sp, #40 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8f16_align1: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #40 -; CHECK-BE-NEXT: sub sp, #40 -; CHECK-BE-NEXT: vrev64.16 q2, q1 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r3, r1, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: rsbs r1, r1, #0 -; CHECK-BE-NEXT: bfi r2, r1, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: bne .LBB16_9 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bmi .LBB16_10 -; CHECK-BE-NEXT: .LBB16_2: @ %else2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bmi .LBB16_11 -; CHECK-BE-NEXT: .LBB16_3: @ %else4 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bmi .LBB16_12 -; CHECK-BE-NEXT: .LBB16_4: @ %else6 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bmi .LBB16_13 -; CHECK-BE-NEXT: .LBB16_5: @ %else8 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bmi .LBB16_14 -; CHECK-BE-NEXT: .LBB16_6: @ %else10 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bmi .LBB16_15 -; CHECK-BE-NEXT: .LBB16_7: @ %else12 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bmi .LBB16_16 -; CHECK-BE-NEXT: .LBB16_8: @ %else14 -; CHECK-BE-NEXT: add sp, #40 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB16_9: @ %cond.store -; CHECK-BE-NEXT: vstr.16 s4, [sp, #28] -; CHECK-BE-NEXT: ldrh.w r2, [sp, #28] -; CHECK-BE-NEXT: strh r2, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bpl .LBB16_2 -; CHECK-BE-NEXT: .LBB16_10: @ %cond.store1 -; CHECK-BE-NEXT: vmovx.f16 s0, s4 -; CHECK-BE-NEXT: vstr.16 s0, [sp, #24] -; CHECK-BE-NEXT: ldrh.w r2, [sp, #24] -; CHECK-BE-NEXT: strh r2, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bpl .LBB16_3 -; CHECK-BE-NEXT: .LBB16_11: @ %cond.store3 -; CHECK-BE-NEXT: vstr.16 s5, [sp, #20] -; CHECK-BE-NEXT: ldrh.w r2, [sp, #20] -; CHECK-BE-NEXT: strh r2, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bpl .LBB16_4 -; CHECK-BE-NEXT: .LBB16_12: @ %cond.store5 -; CHECK-BE-NEXT: vmovx.f16 s0, s5 -; CHECK-BE-NEXT: vstr.16 s0, [sp, #16] -; CHECK-BE-NEXT: ldrh.w r2, [sp, #16] -; CHECK-BE-NEXT: strh r2, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bpl .LBB16_5 -; CHECK-BE-NEXT: .LBB16_13: @ %cond.store7 -; CHECK-BE-NEXT: vstr.16 s6, [sp, #12] -; CHECK-BE-NEXT: ldrh.w r2, [sp, #12] -; CHECK-BE-NEXT: strh r2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bpl .LBB16_6 -; CHECK-BE-NEXT: .LBB16_14: @ %cond.store9 -; CHECK-BE-NEXT: vmovx.f16 s0, s6 -; CHECK-BE-NEXT: vstr.16 s0, [sp, #8] -; CHECK-BE-NEXT: ldrh.w r2, [sp, #8] -; CHECK-BE-NEXT: strh r2, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bpl .LBB16_7 -; CHECK-BE-NEXT: .LBB16_15: @ %cond.store11 -; CHECK-BE-NEXT: vstr.16 s7, [sp, #4] -; CHECK-BE-NEXT: ldrh.w r2, [sp, #4] -; CHECK-BE-NEXT: strh r2, [r0, #12] -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bpl .LBB16_8 -; CHECK-BE-NEXT: .LBB16_16: @ %cond.store13 -; CHECK-BE-NEXT: vmovx.f16 s0, s7 -; CHECK-BE-NEXT: vstr.16 s0, [sp] -; CHECK-BE-NEXT: ldrh.w r1, [sp] -; CHECK-BE-NEXT: strh r1, [r0, #14] -; CHECK-BE-NEXT: add sp, #40 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp ugt <8 x i16> %b, zeroinitializer - call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %a, <8 x half>* %dest, i32 1, <8 x i1> %c) - ret void -} - -define i8* @masked_v8f16_pre(i8* %y, i8* %x, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8f16_pre: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: adds r0, #4 -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r3, r12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #0, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #1, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #2, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #3, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #4, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #5, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: bfi r2, r3, #6, #1 -; CHECK-LE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r3, r3, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r2, r3, #7, #1 -; CHECK-LE-NEXT: uxtb r1, r2 -; CHECK-LE-NEXT: lsls r2, r2, #31 -; CHECK-LE-NEXT: bne .LBB17_9 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bmi .LBB17_10 -; CHECK-LE-NEXT: .LBB17_2: @ %else2 -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bmi .LBB17_11 -; CHECK-LE-NEXT: .LBB17_3: @ %else4 -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bmi .LBB17_12 -; CHECK-LE-NEXT: .LBB17_4: @ %else6 -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bmi .LBB17_13 -; CHECK-LE-NEXT: .LBB17_5: @ %else8 -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bmi .LBB17_14 -; CHECK-LE-NEXT: .LBB17_6: @ %else10 -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bmi .LBB17_15 -; CHECK-LE-NEXT: .LBB17_7: @ %else12 -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bmi .LBB17_16 -; CHECK-LE-NEXT: .LBB17_8: @ %else14 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB17_9: @ %cond.store -; CHECK-LE-NEXT: vstr.16 s0, [r0] -; CHECK-LE-NEXT: lsls r2, r1, #30 -; CHECK-LE-NEXT: bpl .LBB17_2 -; CHECK-LE-NEXT: .LBB17_10: @ %cond.store1 -; CHECK-LE-NEXT: vmovx.f16 s4, s0 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #2] -; CHECK-LE-NEXT: lsls r2, r1, #29 -; CHECK-LE-NEXT: bpl .LBB17_3 -; CHECK-LE-NEXT: .LBB17_11: @ %cond.store3 -; CHECK-LE-NEXT: vstr.16 s1, [r0, #4] -; CHECK-LE-NEXT: lsls r2, r1, #28 -; CHECK-LE-NEXT: bpl .LBB17_4 -; CHECK-LE-NEXT: .LBB17_12: @ %cond.store5 -; CHECK-LE-NEXT: vmovx.f16 s4, s1 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #6] -; CHECK-LE-NEXT: lsls r2, r1, #27 -; CHECK-LE-NEXT: bpl .LBB17_5 -; CHECK-LE-NEXT: .LBB17_13: @ %cond.store7 -; CHECK-LE-NEXT: vstr.16 s2, [r0, #8] -; CHECK-LE-NEXT: lsls r2, r1, #26 -; CHECK-LE-NEXT: bpl .LBB17_6 -; CHECK-LE-NEXT: .LBB17_14: @ %cond.store9 -; CHECK-LE-NEXT: vmovx.f16 s4, s2 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #10] -; CHECK-LE-NEXT: lsls r2, r1, #25 -; CHECK-LE-NEXT: bpl .LBB17_7 -; CHECK-LE-NEXT: .LBB17_15: @ %cond.store11 -; CHECK-LE-NEXT: vstr.16 s3, [r0, #12] -; CHECK-LE-NEXT: lsls r1, r1, #24 -; CHECK-LE-NEXT: bpl .LBB17_8 -; CHECK-LE-NEXT: .LBB17_16: @ %cond.store13 -; CHECK-LE-NEXT: vmovx.f16 s0, s3 -; CHECK-LE-NEXT: vstr.16 s0, [r0, #14] -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v8f16_pre: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: adds r0, #4 -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r3, r12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: bfi r2, r3, #6, #1 -; CHECK-BE-NEXT: ubfx r3, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r3, r3, #0 -; CHECK-BE-NEXT: vldrh.u16 q0, [r1] -; CHECK-BE-NEXT: bfi r2, r3, #7, #1 -; CHECK-BE-NEXT: uxtb r1, r2 -; CHECK-BE-NEXT: lsls r2, r2, #31 -; CHECK-BE-NEXT: bne .LBB17_9 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bmi .LBB17_10 -; CHECK-BE-NEXT: .LBB17_2: @ %else2 -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bmi .LBB17_11 -; CHECK-BE-NEXT: .LBB17_3: @ %else4 -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bmi .LBB17_12 -; CHECK-BE-NEXT: .LBB17_4: @ %else6 -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bmi .LBB17_13 -; CHECK-BE-NEXT: .LBB17_5: @ %else8 -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bmi .LBB17_14 -; CHECK-BE-NEXT: .LBB17_6: @ %else10 -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bmi .LBB17_15 -; CHECK-BE-NEXT: .LBB17_7: @ %else12 -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bmi .LBB17_16 -; CHECK-BE-NEXT: .LBB17_8: @ %else14 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB17_9: @ %cond.store -; CHECK-BE-NEXT: vstr.16 s0, [r0] -; CHECK-BE-NEXT: lsls r2, r1, #30 -; CHECK-BE-NEXT: bpl .LBB17_2 -; CHECK-BE-NEXT: .LBB17_10: @ %cond.store1 -; CHECK-BE-NEXT: vmovx.f16 s4, s0 -; CHECK-BE-NEXT: vstr.16 s4, [r0, #2] -; CHECK-BE-NEXT: lsls r2, r1, #29 -; CHECK-BE-NEXT: bpl .LBB17_3 -; CHECK-BE-NEXT: .LBB17_11: @ %cond.store3 -; CHECK-BE-NEXT: vstr.16 s1, [r0, #4] -; CHECK-BE-NEXT: lsls r2, r1, #28 -; CHECK-BE-NEXT: bpl .LBB17_4 -; CHECK-BE-NEXT: .LBB17_12: @ %cond.store5 -; CHECK-BE-NEXT: vmovx.f16 s4, s1 -; CHECK-BE-NEXT: vstr.16 s4, [r0, #6] -; CHECK-BE-NEXT: lsls r2, r1, #27 -; CHECK-BE-NEXT: bpl .LBB17_5 -; CHECK-BE-NEXT: .LBB17_13: @ %cond.store7 -; CHECK-BE-NEXT: vstr.16 s2, [r0, #8] -; CHECK-BE-NEXT: lsls r2, r1, #26 -; CHECK-BE-NEXT: bpl .LBB17_6 -; CHECK-BE-NEXT: .LBB17_14: @ %cond.store9 -; CHECK-BE-NEXT: vmovx.f16 s4, s2 -; CHECK-BE-NEXT: vstr.16 s4, [r0, #10] -; CHECK-BE-NEXT: lsls r2, r1, #25 -; CHECK-BE-NEXT: bpl .LBB17_7 -; CHECK-BE-NEXT: .LBB17_15: @ %cond.store11 -; CHECK-BE-NEXT: vstr.16 s3, [r0, #12] -; CHECK-BE-NEXT: lsls r1, r1, #24 -; CHECK-BE-NEXT: bpl .LBB17_8 -; CHECK-BE-NEXT: .LBB17_16: @ %cond.store13 -; CHECK-BE-NEXT: vmovx.f16 s0, s3 -; CHECK-BE-NEXT: vstr.16 s0, [r0, #14] -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 4 - %2 = bitcast i8* %z to <8 x half>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) - ret i8* %z -} - -define i8* @masked_v8f16_post(i8* %y, i8* %x, <8 x i16> %a) { -; CHECK-LE-LABEL: masked_v8f16_post: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #8 -; CHECK-LE-NEXT: sub sp, #8 -; CHECK-LE-NEXT: vldr d1, [sp, #8] -; CHECK-LE-NEXT: vmov d0, r2, r3 -; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr -; CHECK-LE-NEXT: vmrs r12, p0 -; CHECK-LE-NEXT: and r2, r12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #0, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #1, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #2, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #3, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #4, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #5, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: bfi r3, r2, #6, #1 -; CHECK-LE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: vldrw.u32 q0, [r1] -; CHECK-LE-NEXT: bfi r3, r2, #7, #1 -; CHECK-LE-NEXT: uxtb r2, r3 -; CHECK-LE-NEXT: lsls r1, r3, #31 -; CHECK-LE-NEXT: bne .LBB18_12 -; CHECK-LE-NEXT: @ %bb.1: @ %else -; CHECK-LE-NEXT: lsls r1, r2, #30 -; CHECK-LE-NEXT: bmi .LBB18_13 -; CHECK-LE-NEXT: .LBB18_2: @ %else2 -; CHECK-LE-NEXT: lsls r1, r2, #29 -; CHECK-LE-NEXT: bmi .LBB18_14 -; CHECK-LE-NEXT: .LBB18_3: @ %else4 -; CHECK-LE-NEXT: lsls r1, r2, #28 -; CHECK-LE-NEXT: bmi .LBB18_15 -; CHECK-LE-NEXT: .LBB18_4: @ %else6 -; CHECK-LE-NEXT: lsls r1, r2, #27 -; CHECK-LE-NEXT: bmi .LBB18_16 -; CHECK-LE-NEXT: .LBB18_5: @ %else8 -; CHECK-LE-NEXT: lsls r1, r2, #26 -; CHECK-LE-NEXT: bpl .LBB18_7 -; CHECK-LE-NEXT: .LBB18_6: @ %cond.store9 -; CHECK-LE-NEXT: vmovx.f16 s4, s2 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #10] -; CHECK-LE-NEXT: .LBB18_7: @ %else10 -; CHECK-LE-NEXT: adds r1, r0, #4 -; CHECK-LE-NEXT: lsls r3, r2, #25 -; CHECK-LE-NEXT: bpl .LBB18_9 -; CHECK-LE-NEXT: @ %bb.8: @ %cond.store11 -; CHECK-LE-NEXT: vstr.16 s3, [r0, #12] -; CHECK-LE-NEXT: .LBB18_9: @ %else12 -; CHECK-LE-NEXT: lsls r2, r2, #24 -; CHECK-LE-NEXT: bpl .LBB18_11 -; CHECK-LE-NEXT: @ %bb.10: @ %cond.store13 -; CHECK-LE-NEXT: vmovx.f16 s0, s3 -; CHECK-LE-NEXT: vstr.16 s0, [r0, #14] -; CHECK-LE-NEXT: .LBB18_11: @ %else14 -; CHECK-LE-NEXT: mov r0, r1 -; CHECK-LE-NEXT: add sp, #8 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .LBB18_12: @ %cond.store -; CHECK-LE-NEXT: vstr.16 s0, [r0] -; CHECK-LE-NEXT: lsls r1, r2, #30 -; CHECK-LE-NEXT: bpl .LBB18_2 -; CHECK-LE-NEXT: .LBB18_13: @ %cond.store1 -; CHECK-LE-NEXT: vmovx.f16 s4, s0 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #2] -; CHECK-LE-NEXT: lsls r1, r2, #29 -; CHECK-LE-NEXT: bpl .LBB18_3 -; CHECK-LE-NEXT: .LBB18_14: @ %cond.store3 -; CHECK-LE-NEXT: vstr.16 s1, [r0, #4] -; CHECK-LE-NEXT: lsls r1, r2, #28 -; CHECK-LE-NEXT: bpl .LBB18_4 -; CHECK-LE-NEXT: .LBB18_15: @ %cond.store5 -; CHECK-LE-NEXT: vmovx.f16 s4, s1 -; CHECK-LE-NEXT: vstr.16 s4, [r0, #6] -; CHECK-LE-NEXT: lsls r1, r2, #27 -; CHECK-LE-NEXT: bpl .LBB18_5 -; CHECK-LE-NEXT: .LBB18_16: @ %cond.store7 -; CHECK-LE-NEXT: vstr.16 s2, [r0, #8] -; CHECK-LE-NEXT: lsls r1, r2, #26 -; CHECK-LE-NEXT: bmi .LBB18_6 -; CHECK-LE-NEXT: b .LBB18_7 -; -; CHECK-BE-LABEL: masked_v8f16_post: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #8 -; CHECK-BE-NEXT: sub sp, #8 -; CHECK-BE-NEXT: vldr d1, [sp, #8] -; CHECK-BE-NEXT: vmov d0, r3, r2 -; CHECK-BE-NEXT: movs r3, #0 -; CHECK-BE-NEXT: vrev64.16 q1, q0 -; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr -; CHECK-BE-NEXT: vmrs r12, p0 -; CHECK-BE-NEXT: and r2, r12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #2, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #4, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #2, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #6, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #3, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #8, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #4, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #10, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #5, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #12, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: bfi r3, r2, #6, #1 -; CHECK-BE-NEXT: ubfx r2, r12, #14, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: vldrh.u16 q0, [r1] -; CHECK-BE-NEXT: bfi r3, r2, #7, #1 -; CHECK-BE-NEXT: uxtb r2, r3 -; CHECK-BE-NEXT: lsls r1, r3, #31 -; CHECK-BE-NEXT: bne .LBB18_12 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r1, r2, #30 -; CHECK-BE-NEXT: bmi .LBB18_13 -; CHECK-BE-NEXT: .LBB18_2: @ %else2 -; CHECK-BE-NEXT: lsls r1, r2, #29 -; CHECK-BE-NEXT: bmi .LBB18_14 -; CHECK-BE-NEXT: .LBB18_3: @ %else4 -; CHECK-BE-NEXT: lsls r1, r2, #28 -; CHECK-BE-NEXT: bmi .LBB18_15 -; CHECK-BE-NEXT: .LBB18_4: @ %else6 -; CHECK-BE-NEXT: lsls r1, r2, #27 -; CHECK-BE-NEXT: bmi .LBB18_16 -; CHECK-BE-NEXT: .LBB18_5: @ %else8 -; CHECK-BE-NEXT: lsls r1, r2, #26 -; CHECK-BE-NEXT: bpl .LBB18_7 -; CHECK-BE-NEXT: .LBB18_6: @ %cond.store9 -; CHECK-BE-NEXT: vmovx.f16 s4, s2 -; CHECK-BE-NEXT: vstr.16 s4, [r0, #10] -; CHECK-BE-NEXT: .LBB18_7: @ %else10 -; CHECK-BE-NEXT: adds r1, r0, #4 -; CHECK-BE-NEXT: lsls r3, r2, #25 -; CHECK-BE-NEXT: bpl .LBB18_9 -; CHECK-BE-NEXT: @ %bb.8: @ %cond.store11 -; CHECK-BE-NEXT: vstr.16 s3, [r0, #12] -; CHECK-BE-NEXT: .LBB18_9: @ %else12 -; CHECK-BE-NEXT: lsls r2, r2, #24 -; CHECK-BE-NEXT: bpl .LBB18_11 -; CHECK-BE-NEXT: @ %bb.10: @ %cond.store13 -; CHECK-BE-NEXT: vmovx.f16 s0, s3 -; CHECK-BE-NEXT: vstr.16 s0, [r0, #14] -; CHECK-BE-NEXT: .LBB18_11: @ %else14 -; CHECK-BE-NEXT: mov r0, r1 -; CHECK-BE-NEXT: add sp, #8 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB18_12: @ %cond.store -; CHECK-BE-NEXT: vstr.16 s0, [r0] -; CHECK-BE-NEXT: lsls r1, r2, #30 -; CHECK-BE-NEXT: bpl .LBB18_2 -; CHECK-BE-NEXT: .LBB18_13: @ %cond.store1 -; CHECK-BE-NEXT: vmovx.f16 s4, s0 -; CHECK-BE-NEXT: vstr.16 s4, [r0, #2] -; CHECK-BE-NEXT: lsls r1, r2, #29 -; CHECK-BE-NEXT: bpl .LBB18_3 -; CHECK-BE-NEXT: .LBB18_14: @ %cond.store3 -; CHECK-BE-NEXT: vstr.16 s1, [r0, #4] -; CHECK-BE-NEXT: lsls r1, r2, #28 -; CHECK-BE-NEXT: bpl .LBB18_4 -; CHECK-BE-NEXT: .LBB18_15: @ %cond.store5 -; CHECK-BE-NEXT: vmovx.f16 s4, s1 -; CHECK-BE-NEXT: vstr.16 s4, [r0, #6] -; CHECK-BE-NEXT: lsls r1, r2, #27 -; CHECK-BE-NEXT: bpl .LBB18_5 -; CHECK-BE-NEXT: .LBB18_16: @ %cond.store7 -; CHECK-BE-NEXT: vstr.16 s2, [r0, #8] -; CHECK-BE-NEXT: lsls r1, r2, #26 -; CHECK-BE-NEXT: bmi .LBB18_6 -; CHECK-BE-NEXT: b .LBB18_7 -entry: - %z = getelementptr inbounds i8, i8* %y, i32 4 - %0 = bitcast i8* %x to <8 x half>* - %1 = load <8 x half>, <8 x half>* %0, align 4 - %2 = bitcast i8* %y to <8 x half>* - %c = icmp sgt <8 x i16> %a, zeroinitializer - call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) - ret i8* %z -} - - -define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) { -; CHECK-LE-LABEL: masked_v2i64: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vmov r2, s0 -; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: vmov r1, s1 -; CHECK-LE-NEXT: vmov r12, s3 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: vmov r2, s2 -; CHECK-LE-NEXT: sbcs.w r1, r3, r1 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r1, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: sbcs.w r2, r3, r12 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r3, #1 -; CHECK-LE-NEXT: cmp r3, #0 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: mvnne r3, #1 -; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: and r1, r3, #3 -; CHECK-LE-NEXT: lsls r2, r3, #31 -; CHECK-LE-NEXT: ittt ne -; CHECK-LE-NEXT: vmovne r2, s1 -; CHECK-LE-NEXT: vmovne r3, s0 -; CHECK-LE-NEXT: strdne r3, r2, [r0] -; CHECK-LE-NEXT: lsls r1, r1, #30 -; CHECK-LE-NEXT: ittt mi -; CHECK-LE-NEXT: vmovmi r1, s3 -; CHECK-LE-NEXT: vmovmi r2, s2 -; CHECK-LE-NEXT: strdmi r2, r1, [r0, #8] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v2i64: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: movs r3, #0 -; CHECK-BE-NEXT: vmov r2, s7 -; CHECK-BE-NEXT: vmov r1, s6 -; CHECK-BE-NEXT: vmov r12, s4 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: vmov r2, s5 -; CHECK-BE-NEXT: sbcs.w r1, r3, r1 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r1, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: sbcs.w r2, r3, r12 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r3, #1 -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: mvnne r3, #1 -; CHECK-BE-NEXT: bfi r3, r1, #0, #1 -; CHECK-BE-NEXT: and r1, r3, #3 -; CHECK-BE-NEXT: lsls r2, r3, #31 -; CHECK-BE-NEXT: bne .LBB19_3 -; CHECK-BE-NEXT: @ %bb.1: @ %else -; CHECK-BE-NEXT: lsls r1, r1, #30 -; CHECK-BE-NEXT: bmi .LBB19_4 -; CHECK-BE-NEXT: .LBB19_2: @ %else2 -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .LBB19_3: @ %cond.store -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vmov r2, s5 -; CHECK-BE-NEXT: vmov r3, s4 -; CHECK-BE-NEXT: strd r3, r2, [r0] -; CHECK-BE-NEXT: lsls r1, r1, #30 -; CHECK-BE-NEXT: bpl .LBB19_2 -; CHECK-BE-NEXT: .LBB19_4: @ %cond.store1 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vmov r1, s7 -; CHECK-BE-NEXT: vmov r2, s6 -; CHECK-BE-NEXT: strd r2, r1, [r0, #8] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <2 x i64> %a, zeroinitializer - call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %a, <2 x i64>* %dest, i32 8, <2 x i1> %c) - ret void -} - -define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) { -; CHECK-LE-LABEL: masked_v2f64: -; CHECK-LE: @ %bb.0: @ %entry -; CHECK-LE-NEXT: .pad #4 -; CHECK-LE-NEXT: sub sp, #4 -; CHECK-LE-NEXT: vmov r2, s4 -; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: vmov r1, s5 -; CHECK-LE-NEXT: vmov r12, s7 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: vmov r2, s6 -; CHECK-LE-NEXT: sbcs.w r1, r3, r1 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r1, #1 -; CHECK-LE-NEXT: rsbs r2, r2, #0 -; CHECK-LE-NEXT: sbcs.w r2, r3, r12 -; CHECK-LE-NEXT: it lt -; CHECK-LE-NEXT: movlt r3, #1 -; CHECK-LE-NEXT: cmp r3, #0 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: mvnne r3, #1 -; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: and r1, r3, #3 -; CHECK-LE-NEXT: lsls r2, r3, #31 -; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vstrne d0, [r0] -; CHECK-LE-NEXT: lsls r1, r1, #30 -; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi d1, [r0, #8] -; CHECK-LE-NEXT: add sp, #4 -; CHECK-LE-NEXT: bx lr -; -; CHECK-BE-LABEL: masked_v2f64: -; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .pad #4 -; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: vrev64.32 q2, q1 -; CHECK-BE-NEXT: movs r3, #0 -; CHECK-BE-NEXT: vmov r2, s11 -; CHECK-BE-NEXT: vmov r1, s10 -; CHECK-BE-NEXT: vmov r12, s8 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: vmov r2, s9 -; CHECK-BE-NEXT: sbcs.w r1, r3, r1 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r1, #1 -; CHECK-BE-NEXT: rsbs r2, r2, #0 -; CHECK-BE-NEXT: sbcs.w r2, r3, r12 -; CHECK-BE-NEXT: it lt -; CHECK-BE-NEXT: movlt r3, #1 -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: mvnne r3, #1 -; CHECK-BE-NEXT: bfi r3, r1, #0, #1 -; CHECK-BE-NEXT: and r1, r3, #3 -; CHECK-BE-NEXT: lsls r2, r3, #31 -; CHECK-BE-NEXT: it ne -; CHECK-BE-NEXT: vstrne d0, [r0] -; CHECK-BE-NEXT: lsls r1, r1, #30 -; CHECK-BE-NEXT: it mi -; CHECK-BE-NEXT: vstrmi d1, [r0, #8] -; CHECK-BE-NEXT: add sp, #4 -; CHECK-BE-NEXT: bx lr -entry: - %c = icmp sgt <2 x i64> %b, zeroinitializer - call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %a, <2 x double>* %dest, i32 8, <2 x i1> %c) - ret void -} - - -declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) -declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) -declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) -declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) -declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) -declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) -declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define arm_aapcs_vfpcc void @masked_v4i32(<4 x i32> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrwt.32 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrwt.32 q1, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a, <4 x i32>* %dest, i32 4, <4 x i1> %c) + ret void +} + +define arm_aapcs_vfpcc void @masked_v4i32_align1(<4 x i32> *%dest, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrbt.8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vrev32.8 q0, q1 +; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrbt.8 q0, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <4 x i32> %a, zeroinitializer + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %a, <4 x i32>* %dest, i32 1, <4 x i1> %c) + ret void +} + +define i8* @masked_v4i32_pre(i8* %y, i8* %x, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_pre: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_pre: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrw.u32 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @masked_v4i32_post(i8* %y, i8* %x, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4i32_post: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrwt.32 q1, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4i32_post: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrw.u32 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrwt.32 q1, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + + +define arm_aapcs_vfpcc void @masked_v8i16(<8 x i16> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrht.16 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrht.16 q1, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %a, <8 x i16>* %dest, i32 2, <8 x i1> %c) + ret void +} + +define arm_aapcs_vfpcc void @masked_v8i16_align1(<8 x i16> *%dest, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrbt.8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vrev16.8 q0, q1 +; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrbt.8 q0, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <8 x i16> %a, zeroinitializer + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %a, <8 x i16>* %dest, i32 1, <8 x i1> %c) + ret void +} + +define i8* @masked_v8i16_pre(i8* %y, i8* %x, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_pre: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_pre: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrh.u16 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.16 q2, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 4 + %2 = bitcast i8* %z to <8 x i16>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @masked_v8i16_post(i8* %y, i8* %x, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8i16_post: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrht.16 q1, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8i16_post: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrh.u16 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.16 q2, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrht.16 q1, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 4 + %2 = bitcast i8* %y to <8 x i16>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + + +define arm_aapcs_vfpcc void @masked_v16i8(<16 x i8> *%dest, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrbt.8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q1, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrbt.8 q1, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <16 x i8> %a, zeroinitializer + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %a, <16 x i8>* %dest, i32 1, <16 x i1> %c) + ret void +} + +define i8* @masked_v16i8_pre(i8* %y, i8* %x, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8_pre: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrbt.8 q1, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8_pre: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrb.u8 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.8 q2, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrbt.8 q1, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 4 + %2 = bitcast i8* %z to <16 x i8>* + %c = icmp sgt <16 x i8> %a, zeroinitializer + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @masked_v16i8_post(i8* %y, i8* %x, <16 x i8> %a) { +; CHECK-LE-LABEL: masked_v16i8_post: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s8 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrbt.8 q1, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v16i8_post: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrb.u8 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.8 q2, q0 +; CHECK-BE-NEXT: vcmp.s8 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrbt.8 q1, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %1 = load <16 x i8>, <16 x i8>* %0, align 4 + %2 = bitcast i8* %y to <16 x i8>* + %c = icmp sgt <16 x i8> %a, zeroinitializer + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + + +define arm_aapcs_vfpcc void @masked_v4f32(<4 x float> *%dest, <4 x float> %a, <4 x i32> %b) { +; CHECK-LE-LABEL: masked_v4f32: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrwt.32 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q2, q1 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrwt.32 q1, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp ugt <4 x i32> %b, zeroinitializer + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %a, <4 x float>* %dest, i32 4, <4 x i1> %c) + ret void +} + +define arm_aapcs_vfpcc void @masked_v4f32_align1(<4 x float> *%dest, <4 x float> %a, <4 x i32> %b) { +; CHECK-LE-LABEL: masked_v4f32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrbt.8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.32 q2, q1 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vrev32.8 q0, q1 +; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrbt.8 q0, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp ugt <4 x i32> %b, zeroinitializer + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %a, <4 x float>* %dest, i32 1, <4 x i1> %c) + ret void +} + +define i8* @masked_v4f32_pre(i8* %y, i8* %x, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4f32_pre: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_pre: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrw.u32 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @masked_v4f32_post(i8* %y, i8* %x, <4 x i32> %a) { +; CHECK-LE-LABEL: masked_v4f32_post: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrwt.32 q1, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v4f32_post: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrw.u32 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vcmp.s32 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrwt.32 q1, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + %c = icmp sgt <4 x i32> %a, zeroinitializer + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + + +define arm_aapcs_vfpcc void @masked_v8f16(<8 x half> *%dest, <8 x half> %a, <8 x i16> %b) { +; CHECK-LE-LABEL: masked_v8f16: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrht.16 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q2, q1 +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrht.16 q1, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp ugt <8 x i16> %b, zeroinitializer + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %a, <8 x half>* %dest, i32 2, <8 x i1> %c) + ret void +} + +define arm_aapcs_vfpcc void @masked_v8f16_align1(<8 x half> *%dest, <8 x half> %a, <8 x i16> %b) { +; CHECK-LE-LABEL: masked_v8f16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrbt.8 q0, [r0] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vrev64.16 q2, q0 +; CHECK-BE-NEXT: vrev16.8 q0, q2 +; CHECK-BE-NEXT: vrev64.16 q2, q1 +; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrbt.8 q0, [r0] +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp ugt <8 x i16> %b, zeroinitializer + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %a, <8 x half>* %dest, i32 1, <8 x i1> %c) + ret void +} + +define i8* @masked_v8f16_pre(i8* %y, i8* %x, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8f16_pre: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_pre: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrh.u16 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.16 q2, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 4 + %2 = bitcast i8* %z to <8 x half>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @masked_v8f16_post(i8* %y, i8* %x, <8 x i16> %a) { +; CHECK-LE-LABEL: masked_v8f16_post: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldr d1, [sp] +; CHECK-LE-NEXT: vldrw.u32 q1, [r1] +; CHECK-LE-NEXT: vmov d0, r2, r3 +; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr +; CHECK-LE-NEXT: vpst +; CHECK-LE-NEXT: vstrht.16 q1, [r0] +; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v8f16_post: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldr d1, [sp] +; CHECK-BE-NEXT: vldrh.u16 q1, [r1] +; CHECK-BE-NEXT: vmov d0, r3, r2 +; CHECK-BE-NEXT: vrev64.16 q2, q0 +; CHECK-BE-NEXT: vcmp.s16 gt, q2, zr +; CHECK-BE-NEXT: vpst +; CHECK-BE-NEXT: vstrht.16 q1, [r0] +; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %1 = load <8 x half>, <8 x half>* %0, align 4 + %2 = bitcast i8* %y to <8 x half>* + %c = icmp sgt <8 x i16> %a, zeroinitializer + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + + +define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) { +; CHECK-LE-LABEL: masked_v2i64: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .pad #4 +; CHECK-LE-NEXT: sub sp, #4 +; CHECK-LE-NEXT: vmov r2, s0 +; CHECK-LE-NEXT: movs r3, #0 +; CHECK-LE-NEXT: vmov r1, s1 +; CHECK-LE-NEXT: vmov r12, s3 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: vmov r2, s2 +; CHECK-LE-NEXT: sbcs.w r1, r3, r1 +; CHECK-LE-NEXT: mov.w r1, #0 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r1, #1 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: sbcs.w r2, r3, r12 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r3, #1 +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: it ne +; CHECK-LE-NEXT: mvnne r3, #1 +; CHECK-LE-NEXT: bfi r3, r1, #0, #1 +; CHECK-LE-NEXT: and r1, r3, #3 +; CHECK-LE-NEXT: lsls r2, r3, #31 +; CHECK-LE-NEXT: ittt ne +; CHECK-LE-NEXT: vmovne r2, s1 +; CHECK-LE-NEXT: vmovne r3, s0 +; CHECK-LE-NEXT: strdne r3, r2, [r0] +; CHECK-LE-NEXT: lsls r1, r1, #30 +; CHECK-LE-NEXT: ittt mi +; CHECK-LE-NEXT: vmovmi r1, s3 +; CHECK-LE-NEXT: vmovmi r2, s2 +; CHECK-LE-NEXT: strdmi r2, r1, [r0, #8] +; CHECK-LE-NEXT: add sp, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v2i64: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .pad #4 +; CHECK-BE-NEXT: sub sp, #4 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r3, #0 +; CHECK-BE-NEXT: vmov r2, s7 +; CHECK-BE-NEXT: vmov r1, s6 +; CHECK-BE-NEXT: vmov r12, s4 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: vmov r2, s5 +; CHECK-BE-NEXT: sbcs.w r1, r3, r1 +; CHECK-BE-NEXT: mov.w r1, #0 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r1, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: sbcs.w r2, r3, r12 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r3, #1 +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: it ne +; CHECK-BE-NEXT: mvnne r3, #1 +; CHECK-BE-NEXT: bfi r3, r1, #0, #1 +; CHECK-BE-NEXT: and r1, r3, #3 +; CHECK-BE-NEXT: lsls r2, r3, #31 +; CHECK-BE-NEXT: bne .LBB19_3 +; CHECK-BE-NEXT: @ %bb.1: @ %else +; CHECK-BE-NEXT: lsls r1, r1, #30 +; CHECK-BE-NEXT: bmi .LBB19_4 +; CHECK-BE-NEXT: .LBB19_2: @ %else2 +; CHECK-BE-NEXT: add sp, #4 +; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: .LBB19_3: @ %cond.store +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov r2, s5 +; CHECK-BE-NEXT: vmov r3, s4 +; CHECK-BE-NEXT: strd r3, r2, [r0] +; CHECK-BE-NEXT: lsls r1, r1, #30 +; CHECK-BE-NEXT: bpl .LBB19_2 +; CHECK-BE-NEXT: .LBB19_4: @ %cond.store1 +; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov r1, s7 +; CHECK-BE-NEXT: vmov r2, s6 +; CHECK-BE-NEXT: strd r2, r1, [r0, #8] +; CHECK-BE-NEXT: add sp, #4 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <2 x i64> %a, zeroinitializer + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %a, <2 x i64>* %dest, i32 8, <2 x i1> %c) + ret void +} + +define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) { +; CHECK-LE-LABEL: masked_v2f64: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .pad #4 +; CHECK-LE-NEXT: sub sp, #4 +; CHECK-LE-NEXT: vmov r2, s4 +; CHECK-LE-NEXT: movs r3, #0 +; CHECK-LE-NEXT: vmov r1, s5 +; CHECK-LE-NEXT: vmov r12, s7 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: vmov r2, s6 +; CHECK-LE-NEXT: sbcs.w r1, r3, r1 +; CHECK-LE-NEXT: mov.w r1, #0 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r1, #1 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: sbcs.w r2, r3, r12 +; CHECK-LE-NEXT: it lt +; CHECK-LE-NEXT: movlt r3, #1 +; CHECK-LE-NEXT: cmp r3, #0 +; CHECK-LE-NEXT: it ne +; CHECK-LE-NEXT: mvnne r3, #1 +; CHECK-LE-NEXT: bfi r3, r1, #0, #1 +; CHECK-LE-NEXT: and r1, r3, #3 +; CHECK-LE-NEXT: lsls r2, r3, #31 +; CHECK-LE-NEXT: it ne +; CHECK-LE-NEXT: vstrne d0, [r0] +; CHECK-LE-NEXT: lsls r1, r1, #30 +; CHECK-LE-NEXT: it mi +; CHECK-LE-NEXT: vstrmi d1, [r0, #8] +; CHECK-LE-NEXT: add sp, #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: masked_v2f64: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .pad #4 +; CHECK-BE-NEXT: sub sp, #4 +; CHECK-BE-NEXT: vrev64.32 q2, q1 +; CHECK-BE-NEXT: movs r3, #0 +; CHECK-BE-NEXT: vmov r2, s11 +; CHECK-BE-NEXT: vmov r1, s10 +; CHECK-BE-NEXT: vmov r12, s8 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: vmov r2, s9 +; CHECK-BE-NEXT: sbcs.w r1, r3, r1 +; CHECK-BE-NEXT: mov.w r1, #0 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r1, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: sbcs.w r2, r3, r12 +; CHECK-BE-NEXT: it lt +; CHECK-BE-NEXT: movlt r3, #1 +; CHECK-BE-NEXT: cmp r3, #0 +; CHECK-BE-NEXT: it ne +; CHECK-BE-NEXT: mvnne r3, #1 +; CHECK-BE-NEXT: bfi r3, r1, #0, #1 +; CHECK-BE-NEXT: and r1, r3, #3 +; CHECK-BE-NEXT: lsls r2, r3, #31 +; CHECK-BE-NEXT: it ne +; CHECK-BE-NEXT: vstrne d0, [r0] +; CHECK-BE-NEXT: lsls r1, r1, #30 +; CHECK-BE-NEXT: it mi +; CHECK-BE-NEXT: vstrmi d1, [r0, #8] +; CHECK-BE-NEXT: add sp, #4 +; CHECK-BE-NEXT: bx lr +entry: + %c = icmp sgt <2 x i64> %b, zeroinitializer + call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %a, <2 x double>* %dest, i32 8, <2 x i1> %c) + ret void +} + + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) +declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) +declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) diff --git a/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll b/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll new file mode 100644 index 00000000000..38d838576bd --- /dev/null +++ b/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll @@ -0,0 +1,40 @@ +; RUN: opt -loop-vectorize -enable-arm-maskedldst < %s -S -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1-m.main-none-eabi" + +; CHECK-LABEL: test +; CHECK: llvm.masked.store.v4i32.p0v4i32 +define void @test(i32* nocapture %A, i32 %n) #0 { +entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.inc + %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013 + %0 = load i32, i32* %arrayidx, align 4 + %.off = add i32 %0, 9 + %1 = icmp ult i32 %.off, 19 + br i1 %1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + store i32 0, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %inc = add nuw nsw i32 %i.013, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.inc + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void +} + +attributes #0 = { "target-features"="+mve" } -- 2.11.4.GIT