llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2
   2 ; Test memory sanitizer instrumentation for Arm NEON tbl instructions.
   3 ;
   4 ; RUN: opt < %s -passes=msan -S | FileCheck %s
   5 ;
   6 ; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll
   7
   8 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   9 target triple = "aarch64--linux-android9001"
  10
  11 ; -----------------------------------------------------------------------------------------------------------------------------------------------
  12
  13 define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind sanitize_memory {
  14 ; CHECK-LABEL: define <8 x i8> @tbl1_8b
  15 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
  16 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
  17 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
  18 ; CHECK-NEXT:    call void @llvm.donothing()
  19 ; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[TMP1]], <8 x i8> [[B]])
  20 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP2]], [[TMP3]]
  21 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
  22 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
  23 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
  24 ;
  25   %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
  26   ret <8 x i8> %out
  27 }
  28
  29 define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind sanitize_memory {
  30 ; CHECK-LABEL: define <16 x i8> @tbl1_16b
  31 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
  32 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
  33 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
  34 ; CHECK-NEXT:    call void @llvm.donothing()
  35 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[B]])
  36 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]]
  37 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
  38 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
  39 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
  40 ;
  41   %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
  42   ret <16 x i8> %out
  43 }
  44
  45 define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) sanitize_memory {
  46 ; CHECK-LABEL: define <8 x i8> @tbl2_8b
  47 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR1:[0-9]+]] {
  48 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
  49 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
  50 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
  51 ; CHECK-NEXT:    call void @llvm.donothing()
  52 ; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
  53 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
  54 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
  55 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
  56 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
  57 ;
  58   %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
  59   ret <8 x i8> %out
  60 }
  61
  62 define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) sanitize_memory {
  63 ; CHECK-LABEL: define <16 x i8> @tbl2_16b
  64 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR1]] {
  65 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
  66 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
  67 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
  68 ; CHECK-NEXT:    call void @llvm.donothing()
  69 ; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
  70 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
  71 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
  72 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
  73 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
  74 ;
  75   %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
  76   ret <16 x i8> %out
  77 }
  78
  79 define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory {
  80 ; CHECK-LABEL: define <8 x i8> @tbl3_8b
  81 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] {
  82 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
  83 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
  84 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
  85 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
  86 ; CHECK-NEXT:    call void @llvm.donothing()
  87 ; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[D]])
  88 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]]
  89 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
  90 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
  91 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
  92 ;
  93   %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
  94   ret <8 x i8> %out
  95 }
  96
  97 define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory {
  98 ; CHECK-LABEL: define <16 x i8> @tbl3_16b
  99 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
 100 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 101 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 102 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 103 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 104 ; CHECK-NEXT:    call void @llvm.donothing()
 105 ; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[D]])
 106 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]]
 107 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
 108 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 109 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
 110 ;
 111   %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
 112   ret <16 x i8> %out
 113 }
 114
 115 define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory {
 116 ; CHECK-LABEL: define <8 x i8> @tbl4_8b
 117 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] {
 118 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 119 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 120 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 121 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 122 ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 123 ; CHECK-NEXT:    call void @llvm.donothing()
 124 ; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[E]])
 125 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP5]], [[TMP6]]
 126 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
 127 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 128 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
 129 ;
 130   %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
 131   ret <8 x i8> %out
 132 }
 133
 134 define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory {
 135 ; CHECK-LABEL: define <16 x i8> @tbl4_16b
 136 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] {
 137 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 138 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 139 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 140 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 141 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 142 ; CHECK-NEXT:    call void @llvm.donothing()
 143 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[E]])
 144 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP6]]
 145 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
 146 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 147 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
 148 ;
 149   %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
 150   ret <16 x i8> %out
 151 }
 152
 153
 154
 155 define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
 156 ; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8
 157 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
 158 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 159 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 160 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 161 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 162 ; CHECK-NEXT:    call void @llvm.donothing()
 163 ; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
 164 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> zeroinitializer, [[TMP5]]
 165 ; CHECK-NEXT:    [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
 166 ; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
 167 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i8> zeroinitializer, [[TMP6]]
 168 ; CHECK-NEXT:    [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
 169 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSPROP]], <8 x i8> [[_MSPROP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 170 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 171 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
 172 ; CHECK-NEXT:    ret <8 x i8> [[S]]
 173 ;
 174   %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
 175   %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
 176   %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
 177   ret <8 x i8> %s
 178 }
 179
 180
 181
 182 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
 183 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4
 184 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
 185 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 186 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 187 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 188 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 189 ; CHECK-NEXT:    call void @llvm.donothing()
 190 ; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 191 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
 192 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 193 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 194 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
 195 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 196 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 197 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 198 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
 199 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 200 ;
 201   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 202   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 203   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 204   ret <16 x i8> %s
 205 }
 206
 207
 208 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
 209 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask
 210 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
 211 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 212 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 213 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 214 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 215 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 216 ; CHECK-NEXT:    call void @llvm.donothing()
 217 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0
 218 ; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
 219 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1
 220 ; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
 221 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2
 222 ; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
 223 ; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3
 224 ; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
 225 ; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4
 226 ; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
 227 ; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5
 228 ; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
 229 ; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6
 230 ; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
 231 ; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7
 232 ; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
 233 ; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8
 234 ; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
 235 ; CHECK-NEXT:    [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9
 236 ; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
 237 ; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10
 238 ; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
 239 ; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11
 240 ; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
 241 ; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12
 242 ; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
 243 ; CHECK-NEXT:    [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13
 244 ; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
 245 ; CHECK-NEXT:    [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14
 246 ; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
 247 ; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15
 248 ; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
 249 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[INS_15]])
 250 ; CHECK-NEXT:    [[_MSPROP16:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP6]]
 251 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
 252 ; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 253 ; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <16 x i8> zeroinitializer, [[TMP7]]
 254 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 255 ; CHECK-NEXT:    [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 256 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 257 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
 258 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 259 ;
 260   %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
 261   %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
 262   %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
 263   %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
 264   %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
 265   %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
 266   %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
 267   %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
 268   %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
 269   %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
 270   %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
 271   %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
 272   %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
 273   %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
 274   %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
 275   %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
 276   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
 277   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 278   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 279   ret <16 x i8> %s
 280 }
 281
 282
 283 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
 284 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2
 285 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
 286 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 287 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 288 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 289 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 290 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 291 ; CHECK-NEXT:    call void @llvm.donothing()
 292 ; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0
 293 ; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1
 294 ; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2
 295 ; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3
 296 ; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4
 297 ; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5
 298 ; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6
 299 ; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7
 300 ; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
 301 ; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
 302 ; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
 303 ; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
 304 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1>, i8 [[TMP1]], i32 12
 305 ; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12
 306 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 13
 307 ; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13
 308 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 0, i32 14
 309 ; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
 310 ; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 15
 311 ; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
 312 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[INS_15]])
 313 ; CHECK-NEXT:    [[_MSPROP4:%.*]] = or <16 x i8> [[_MSPROP3]], [[TMP6]]
 314 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
 315 ; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 316 ; CHECK-NEXT:    [[_MSPROP5:%.*]] = or <16 x i8> zeroinitializer, [[TMP7]]
 317 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 318 ; CHECK-NEXT:    [[_MSPROP6:%.*]] = shufflevector <16 x i8> [[_MSPROP4]], <16 x i8> [[_MSPROP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
 319 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
 320 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP6]], ptr @__msan_retval_tls, align 8
 321 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 322 ;
 323   %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
 324   %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
 325   %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
 326   %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
 327   %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
 328   %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
 329   %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
 330   %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
 331   %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
 332   %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
 333   %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
 334   %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
 335   %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
 336   %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
 337   %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
 338   %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
 339   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
 340   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 341   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
 342   ret <16 x i8> %s
 343 }
 344
 345
 346
 347 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
 348 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask
 349 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
 350 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 351 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 352 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 353 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 354 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 355 ; CHECK-NEXT:    call void @llvm.donothing()
 356 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0
 357 ; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
 358 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1
 359 ; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
 360 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2
 361 ; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
 362 ; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3
 363 ; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
 364 ; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4
 365 ; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
 366 ; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5
 367 ; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
 368 ; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6
 369 ; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
 370 ; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7
 371 ; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
 372 ; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8
 373 ; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
 374 ; CHECK-NEXT:    [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9
 375 ; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
 376 ; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10
 377 ; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
 378 ; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11
 379 ; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
 380 ; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12
 381 ; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
 382 ; CHECK-NEXT:    [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13
 383 ; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
 384 ; CHECK-NEXT:    [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14
 385 ; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
 386 ; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15
 387 ; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
 388 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 389 ; CHECK-NEXT:    [[_MSPROP16:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
 390 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 391 ; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[INS_15]])
 392 ; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP7]]
 393 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
 394 ; CHECK-NEXT:    [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 395 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 396 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
 397 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 398 ;
 399   %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
 400   %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
 401   %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
 402   %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
 403   %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
 404   %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
 405   %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
 406   %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
 407   %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
 408   %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
 409   %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
 410   %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
 411   %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
 412   %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
 413   %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
 414   %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
 415   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 416   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
 417   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 418   ret <16 x i8> %s
 419 }
 420
 421
 422
 423 define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
 424 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2
 425 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
 426 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 427 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 428 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 429 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 430 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 431 ; CHECK-NEXT:    call void @llvm.donothing()
 432 ; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0
 433 ; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
 434 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1
 435 ; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
 436 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2
 437 ; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
 438 ; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3
 439 ; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
 440 ; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4
 441 ; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
 442 ; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5
 443 ; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
 444 ; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6
 445 ; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
 446 ; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7
 447 ; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
 448 ; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8
 449 ; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
 450 ; CHECK-NEXT:    [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9
 451 ; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
 452 ; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10
 453 ; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
 454 ; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11
 455 ; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
 456 ; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12
 457 ; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
 458 ; CHECK-NEXT:    [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13
 459 ; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
 460 ; CHECK-NEXT:    [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 [[TMP1]], i32 14
 461 ; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14
 462 ; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 [[TMP1]], i32 15
 463 ; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
 464 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 465 ; CHECK-NEXT:    [[_MSPROP16:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
 466 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 467 ; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[INS_15]])
 468 ; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP7]]
 469 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
 470 ; CHECK-NEXT:    [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
 471 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
 472 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
 473 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 474 ;
 475   %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
 476   %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
 477   %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
 478   %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
 479   %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
 480   %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
 481   %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
 482   %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
 483   %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
 484   %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
 485   %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
 486   %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
 487   %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
 488   %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
 489   %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
 490   %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
 491   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 492   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
 493   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
 494   ret <16 x i8> %s
 495 }
 496
 497
 498
 499 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
 500 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle
 501 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
 502 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 503 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 504 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 505 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 506 ; CHECK-NEXT:    call void @llvm.donothing()
 507 ; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 508 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
 509 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 510 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 511 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
 512 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 513 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 514 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 515 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
 516 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 517 ;
 518   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 519   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 520   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 521   ret <16 x i8> %s
 522 }
 523
 524
 525
 526 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
 527 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1
 528 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
 529 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 530 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 531 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 532 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 533 ; CHECK-NEXT:    call void @llvm.donothing()
 534 ; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 535 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
 536 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 537 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 538 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
 539 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 540 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 541 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 542 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
 543 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 544 ;
 545   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 546   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 547   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 548   ret <16 x i8> %s
 549 }
 550
 551
 552
 553 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
 554 ; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2
 555 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
 556 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 557 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 558 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 559 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 560 ; CHECK-NEXT:    call void @llvm.donothing()
 561 ; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 562 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
 563 ; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 564 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 565 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
 566 ; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 567 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 568 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 569 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
 570 ; CHECK-NEXT:    ret <16 x i8> [[S]]
 571 ;
 572   %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 573   %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
 574   %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 575   ret <16 x i8> %s
 576 }
 577
 578 declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 579 declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
 580 declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 581 declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
 582 declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 583 declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
 584 declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 585 declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
 586
 587 define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind sanitize_memory {
 588 ; CHECK-LABEL: define <8 x i8> @tbx1_8b
 589 ; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] {
 590 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
 591 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 592 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
 593 ; CHECK-NEXT:    call void @llvm.donothing()
 594 ; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
 595 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
 596 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
 597 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 598 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
 599 ;
 600   %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
 601   ret <8 x i8> %out
 602 }
 603
 604 define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind sanitize_memory {
 605 ; CHECK-LABEL: define <16 x i8> @tbx1_16b
 606 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] {
 607 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 608 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 609 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 610 ; CHECK-NEXT:    call void @llvm.donothing()
 611 ; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
 612 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
 613 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
 614 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 615 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
 616 ;
 617   %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
 618   ret <16 x i8> %out
 619 }
 620
 621 define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory {
 622 ; CHECK-LABEL: define <8 x i8> @tbx2_8b
 623 ; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] {
 624 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
 625 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 626 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
 627 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
 628 ; CHECK-NEXT:    call void @llvm.donothing()
 629 ; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[D]])
 630 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]]
 631 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
 632 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 633 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
 634 ;
 635   %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
 636   ret <8 x i8> %out
 637 }
 638
 639 define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory {
 640 ; CHECK-LABEL: define <16 x i8> @tbx2_16b
 641 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
 642 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 643 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 644 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 645 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 646 ; CHECK-NEXT:    call void @llvm.donothing()
 647 ; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[D]])
 648 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]]
 649 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
 650 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 651 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
 652 ;
 653   %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
 654   ret <16 x i8> %out
 655 }
 656
 657 define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory {
 658 ; CHECK-LABEL: define <8 x i8> @tbx3_8b
 659 ; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] {
 660 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
 661 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 662 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
 663 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
 664 ; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 665 ; CHECK-NEXT:    call void @llvm.donothing()
 666 ; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[E]])
 667 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP5]], [[TMP6]]
 668 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
 669 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 670 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
 671 ;
 672   %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
 673   ret <8 x i8> %out
 674 }
 675
 676 define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory {
 677 ; CHECK-LABEL: define <16 x i8> @tbx3_16b
 678 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] {
 679 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 680 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 681 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 682 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 683 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 684 ; CHECK-NEXT:    call void @llvm.donothing()
 685 ; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[E]])
 686 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP6]]
 687 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
 688 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 689 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
 690 ;
 691   %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
 692   ret <16 x i8> %out
 693 }
 694
 695 define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) sanitize_memory {
 696 ; CHECK-LABEL: define <8 x i8> @tbx4_8b
 697 ; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) #[[ATTR1]] {
 698 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
 699 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 700 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
 701 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
 702 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
 703 ; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
 704 ; CHECK-NEXT:    call void @llvm.donothing()
 705 ; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <8 x i8> [[F]])
 706 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP6]], [[TMP7]]
 707 ; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]])
 708 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 709 ; CHECK-NEXT:    ret <8 x i8> [[OUT]]
 710 ;
 711   %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
 712   ret <8 x i8> %out
 713 }
 714
 715 define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) sanitize_memory {
 716 ; CHECK-LABEL: define <16 x i8> @tbx4_16b
 717 ; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) #[[ATTR1]] {
 718 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 719 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 720 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 721 ; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
 722 ; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 723 ; CHECK-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
 724 ; CHECK-NEXT:    call void @llvm.donothing()
 725 ; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[F]])
 726 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP6]], [[TMP7]]
 727 ; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]])
 728 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 729 ; CHECK-NEXT:    ret <16 x i8> [[OUT]]
 730 ;
 731   %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
 732   ret <16 x i8> %out
 733 }
 734
 735 declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 736 declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
 737 declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 738 declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
 739 declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 740 declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
 741 declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
 742 declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone