llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll

   1 ; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
   2 ; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
   3 ; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
   4 ; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   5 ; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   6 ; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   7 ; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
   9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
  17
  18 target triple = "aarch64-unknown-linux-gnu"
  19
  20 ; Don't use SVE when its registers are no bigger than NEON.
  21 ; NO_SVE-NOT: ptrue
  22
  23 ;
  24 ; UCVTF H -> H
  25 ;
  26
  27 ; Don't use SVE for 64-bit vectors.
  28 define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) #0 {
  29 ; CHECK-LABEL: ucvtf_v4i16_v4f16:
  30 ; CHECK: ucvtf v0.4h, v0.4h
  31 ; CHECK-NEXT: ret
  32   %res = uitofp <4 x i16> %op1 to <4 x half>
  33   ret <4 x half> %res
  34 }
  35
  36 ; Don't use SVE for 128-bit vectors.
  37 define void @ucvtf_v8i16_v8f16(<8 x i16>* %a, <8 x half>* %b) #0 {
  38 ; CHECK-LABEL: ucvtf_v8i16_v8f16:
  39 ; CHECK: ldr q0, [x0]
  40 ; CHECK-NEXT: ucvtf v0.8h, v0.8h
  41 ; CHECK-NEXT: str q0, [x1]
  42 ; CHECK-NEXT: ret
  43   %op1 = load <8 x i16>, <8 x i16>* %a
  44   %res = uitofp <8 x i16> %op1 to <8 x half>
  45   store <8 x half> %res, <8 x half>* %b
  46   ret void
  47 }
  48
  49 define void @ucvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) #0 {
  50 ; CHECK-LABEL: ucvtf_v16i16_v16f16:
  51 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
  52 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
  53 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
  54 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
  55 ; CHECK-NEXT: ret
  56   %op1 = load <16 x i16>, <16 x i16>* %a
  57   %res = uitofp <16 x i16> %op1 to <16 x half>
  58   store <16 x half> %res, <16 x half>* %b
  59   ret void
  60 }
  61
  62 define void @ucvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {
  63 ; CHECK-LABEL: ucvtf_v32i16_v32f16:
  64 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
  65 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
  66 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
  67 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
  68 ; VBITS_GE_512-NEXT: ret
  69
  70 ; Ensure sensible type legalisation.
  71 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
  72 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
  73 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
  74 ; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
  75 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
  76 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
  77 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
  78 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
  79 ; VBITS_EQ_256-NEXT: ret
  80   %op1 = load <32 x i16>, <32 x i16>* %a
  81   %res = uitofp <32 x i16> %op1 to <32 x half>
  82   store <32 x half> %res, <32 x half>* %b
  83   ret void
  84 }
  85
  86 define void @ucvtf_v64i16_v64f16(<64 x i16>* %a, <64 x half>* %b) #0 {
  87 ; CHECK-LABEL: ucvtf_v64i16_v64f16:
  88 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
  89 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
  90 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
  91 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
  92 ; VBITS_GE_1024-NEXT: ret
  93   %op1 = load <64 x i16>, <64 x i16>* %a
  94   %res = uitofp <64 x i16> %op1 to <64 x half>
  95   store <64 x half> %res, <64 x half>* %b
  96   ret void
  97 }
  98
  99 define void @ucvtf_v128i16_v128f16(<128 x i16>* %a, <128 x half>* %b) #0 {
 100 ; CHECK-LABEL: ucvtf_v128i16_v128f16:
 101 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 102 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 103 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 104 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 105 ; VBITS_GE_2048-NEXT: ret
 106   %op1 = load <128 x i16>, <128 x i16>* %a
 107   %res = uitofp <128 x i16> %op1 to <128 x half>
 108   store <128 x half> %res, <128 x half>* %b
 109   ret void
 110 }
 111
 112 ;
 113 ; UCVTF H -> S
 114 ;
 115
 116 ; Don't use SVE for 64-bit vectors.
 117 define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) #0 {
 118 ; CHECK-LABEL: ucvtf_v2i16_v2f32:
 119 ; CHECK: movi d1, #0x00ffff0000ffff
 120 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
 121 ; CHECK-NEXT: ucvtf v0.2s, v0.2s
 122 ; CHECK-NEXT: ret
 123   %res = uitofp <2 x i16> %op1 to <2 x float>
 124   ret <2 x float> %res
 125 }
 126
 127 ; Don't use SVE for 128-bit vectors.
 128 define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) #0 {
 129 ; CHECK-LABEL: ucvtf_v4i16_v4f32:
 130 ; CHECK: ucvtf v0.4s, v0.4s
 131 ; CHECK-NEXT: ret
 132   %res = uitofp <4 x i16> %op1 to <4 x float>
 133   ret <4 x float> %res
 134 }
 135
 136 define void @ucvtf_v8i16_v8f32(<8 x i16>* %a, <8 x float>* %b) #0 {
 137 ; CHECK-LABEL: ucvtf_v8i16_v8f32:
 138 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
 139 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].s, vl8
 140 ; CHECK-NEXT: uunpklo [[UPK:z[0-9]+]].s, z[[OP]].h
 141 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[UPK]].s
 142 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 143 ; CHECK-NEXT: ret
 144   %op1 = load <8 x i16>, <8 x i16>* %a
 145   %res = uitofp <8 x i16> %op1 to <8 x float>
 146   store <8 x float> %res, <8 x float>* %b
 147   ret void
 148 }
 149
 150 define void @ucvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
 151 ; CHECK-LABEL: ucvtf_v16i16_v16f32:
 152 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].h, vl16
 153 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
 154 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s, vl16
 155 ; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[OP]].h
 156 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
 157 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
 158 ; VBITS_GE_512-NEXT: ret
 159
 160 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
 161 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl16
 162 ; VBITS_EQ_256-DAG: ld1h { [[VEC:z[0-9]+]].h }, [[PG1]]/z, [x0]
 163 ; VBITS_EQ_256-DAG: mov x8, sp
 164 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
 165 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 166 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
 167 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 168 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
 169 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
 170 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s
 171 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s
 172 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
 173 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
 174   %op1 = load <16 x i16>, <16 x i16>* %a
 175   %res = uitofp <16 x i16> %op1 to <16 x float>
 176   store <16 x float> %res, <16 x float>* %b
 177   ret void
 178 }
 179
 180 define void @ucvtf_v32i16_v32f32(<32 x i16>* %a, <32 x float>* %b) #0 {
 181 ; CHECK-LABEL: ucvtf_v32i16_v32f32:
 182 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl32
 183 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
 184 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s, vl32
 185 ; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[OP]].h
 186 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
 187 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
 188 ; VBITS_GE_1024-NEXT: ret
 189   %op1 = load <32 x i16>, <32 x i16>* %a
 190   %res = uitofp <32 x i16> %op1 to <32 x float>
 191   store <32 x float> %res, <32 x float>* %b
 192   ret void
 193 }
 194
 195 define void @ucvtf_v64i16_v64f32(<64 x i16>* %a, <64 x float>* %b) #0 {
 196 ; CHECK-LABEL: ucvtf_v64i16_v64f32:
 197 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl64
 198 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
 199 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s, vl64
 200 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].s, [[OP]].h
 201 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
 202 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
 203 ; VBITS_GE_2048-NEXT: ret
 204   %op1 = load <64 x i16>, <64 x i16>* %a
 205   %res = uitofp <64 x i16> %op1 to <64 x float>
 206   store <64 x float> %res, <64 x float>* %b
 207   ret void
 208 }
 209
 210 ;
 211 ; UCVTF H -> D
 212 ;
 213
 214 ; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
 215 define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) #0 {
 216 ; CHECK-LABEL: ucvtf_v1i16_v1f64:
 217 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 218 ; CHECK-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z0.h
 219 ; CHECK-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
 220 ; CHECK-NEXT: ucvtf z0.d, [[PG]]/m, [[UPK2]].d
 221 ; CHECK-NEXT: ret
 222   %res = uitofp <1 x i16> %op1 to <1 x double>
 223   ret <1 x double> %res
 224 }
 225
 226 ; Don't use SVE for 128-bit vectors.
 227 define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) #0 {
 228 ; CHECK-LABEL: ucvtf_v2i16_v2f64:
 229 ; CHECK: movi d1, #0x00ffff0000ffff
 230 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
 231 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0
 232 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
 233 ; CHECK-NEXT: ret
 234   %res = uitofp <2 x i16> %op1 to <2 x double>
 235   ret <2 x double> %res
 236 }
 237
 238 define void @ucvtf_v4i16_v4f64(<4 x i16>* %a, <4 x double>* %b) #0 {
 239 ; CHECK-LABEL: ucvtf_v4i16_v4f64:
 240 ; CHECK: ldr d[[OP:[0-9]+]], [x0]
 241 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
 242 ; CHECK-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
 243 ; CHECK-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
 244 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
 245 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 246 ; CHECK-NEXT: ret
 247   %op1 = load <4 x i16>, <4 x i16>* %a
 248   %res = uitofp <4 x i16> %op1 to <4 x double>
 249   store <4 x double> %res, <4 x double>* %b
 250   ret void
 251 }
 252
 253 define void @ucvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
 254 ; CHECK-LABEL: ucvtf_v8i16_v8f64:
 255 ; VBITS_GE_512: ldr q[[OP:[0-9]+]], [x0]
 256 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
 257 ; VBITS_GE_512-NEXT: uunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
 258 ; VBITS_GE_512-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
 259 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
 260 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 261 ; VBITS_GE_512-NEXT: ret
 262
 263 ; Ensure sensible type legalisation.
 264 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
 265 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
 266 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 267 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
 268 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
 269 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
 270 ; VBITS_EQ_256-DAG: uunpklo [[UPK2_LO:z[0-9]+]].d, [[UPK1_LO]].s
 271 ; VBITS_EQ_256-DAG: uunpklo [[UPK2_HI:z[0-9]+]].d, [[UPK1_HI]].s
 272 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d
 273 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d
 274 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
 275 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
 276 ; VBITS_EQ_256-NEXT: ret
 277   %op1 = load <8 x i16>, <8 x i16>* %a
 278   %res = uitofp <8 x i16> %op1 to <8 x double>
 279   store <8 x double> %res, <8 x double>* %b
 280   ret void
 281 }
 282
 283 define void @ucvtf_v16i16_v16f64(<16 x i16>* %a, <16 x double>* %b) #0 {
 284 ; CHECK-LABEL: ucvtf_v16i16_v16f64:
 285 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl16
 286 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
 287 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
 288 ; VBITS_GE_1024-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
 289 ; VBITS_GE_1024-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
 290 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
 291 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
 292 ; VBITS_GE_1024-NEXT: ret
 293   %op1 = load <16 x i16>, <16 x i16>* %a
 294   %res = uitofp <16 x i16> %op1 to <16 x double>
 295   store <16 x double> %res, <16 x double>* %b
 296   ret void
 297 }
 298
 299 define void @ucvtf_v32i16_v32f64(<32 x i16>* %a, <32 x double>* %b) #0 {
 300 ; CHECK-LABEL: ucvtf_v32i16_v32f64:
 301 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl32
 302 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
 303 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
 304 ; VBITS_GE_2048-NEXT: uunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
 305 ; VBITS_GE_2048-NEXT: uunpklo [[UPK2:z[0-9]+]].d, [[UPK]].s
 306 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
 307 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
 308 ; VBITS_GE_2048-NEXT: ret
 309   %op1 = load <32 x i16>, <32 x i16>* %a
 310   %res = uitofp <32 x i16> %op1 to <32 x double>
 311   store <32 x double> %res, <32 x double>* %b
 312   ret void
 313 }
 314
 315 ;
 316 ; UCVTF S -> H
 317 ;
 318
 319 ; Don't use SVE for 64-bit vectors.
 320 define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) #0 {
 321 ; CHECK-LABEL: ucvtf_v2i32_v2f16:
 322 ; CHECK: ucvtf v0.4s, v0.4s
 323 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
 324 ; CHECK-NEXT: ret
 325   %res = uitofp <2 x i32> %op1 to <2 x half>
 326   ret <2 x half> %res
 327 }
 328
 329 ; Don't use SVE for 128-bit vectors.
 330 define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) #0 {
 331 ; CHECK-LABEL: ucvtf_v4i32_v4f16:
 332 ; CHECK: ucvtf v0.4s, v0.4s
 333 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
 334 ; CHECK-NEXT: ret
 335   %res = uitofp <4 x i32> %op1 to <4 x half>
 336   ret <4 x half> %res
 337 }
 338
 339 define <8 x half> @ucvtf_v8i32_v8f16(<8 x i32>* %a) #0 {
 340 ; CHECK-LABEL: ucvtf_v8i32_v8f16:
 341 ; CHECK: ptrue [[PG1:p[0-9]+]].s, vl8
 342 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
 343 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].s
 344 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
 345 ; CHECK-NEXT: uzp1 z0.h, [[CVT]].h, [[CVT]].h
 346 ; CHECK-NEXT: ret
 347   %op1 = load <8 x i32>, <8 x i32>* %a
 348   %res = uitofp <8 x i32> %op1 to <8 x half>
 349   ret <8 x half> %res
 350 }
 351
 352 define void @ucvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {
 353 ; CHECK-LABEL: ucvtf_v16i32_v16f16:
 354 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl16
 355 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
 356 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s
 357 ; VBITS_GE_512-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
 358 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
 359 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
 360 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
 361 ; VBITS_GE_512-NEXT: ret
 362
 363 ; Ensure sensible type legalisation.
 364 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
 365 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 366 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 367 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 368 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
 369 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
 370 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s
 371 ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].s
 372 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].h, [[CVT_LO]].h, [[CVT_LO]].h
 373 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].h, [[CVT_HI]].h, [[CVT_HI]].h
 374 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].h, [[PG3]], [[RES_LO]].h, [[RES_HI]].h
 375 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].h, vl16
 376 ; VBITS_EQ_256-NEXT: st1h { [[RES]].h }, [[PG4]], [x1]
 377 ; VBITS_EQ_256-NEXT: ret
 378   %op1 = load <16 x i32>, <16 x i32>* %a
 379   %res = uitofp <16 x i32> %op1 to <16 x half>
 380   store <16 x half> %res, <16 x half>* %b
 381   ret void
 382 }
 383
 384 define void @ucvtf_v32i32_v32f16(<32 x i32>* %a, <32 x half>* %b) #0 {
 385 ; CHECK-LABEL: ucvtf_v32i32_v32f16:
 386 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl32
 387 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
 388 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s
 389 ; VBITS_GE_1024-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
 390 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
 391 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
 392 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
 393 ; VBITS_GE_1024-NEXT: ret
 394   %op1 = load <32 x i32>, <32 x i32>* %a
 395   %res = uitofp <32 x i32> %op1 to <32 x half>
 396   store <32 x half> %res, <32 x half>* %b
 397   ret void
 398 }
 399
 400 define void @ucvtf_v64i32_v64f16(<64 x i32>* %a, <64 x half>* %b) #0 {
 401 ; CHECK-LABEL: ucvtf_v64i32_v64f16:
 402 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl64
 403 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
 404 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s
 405 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].h, [[PG2]]/m, [[UPK]].s
 406 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
 407 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl64
 408 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
 409 ; VBITS_GE_2048-NEXT: ret
 410   %op1 = load <64 x i32>, <64 x i32>* %a
 411   %res = uitofp <64 x i32> %op1 to <64 x half>
 412   store <64 x half> %res, <64 x half>* %b
 413   ret void
 414 }
 415
 416 ;
 417 ; UCVTF S -> S
 418 ;
 419
 420 ; Don't use SVE for 64-bit vectors.
 421 define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) #0 {
 422 ; CHECK-LABEL: ucvtf_v2i32_v2f32:
 423 ; CHECK: ucvtf v0.2s, v0.2s
 424 ; CHECK-NEXT: ret
 425   %res = uitofp <2 x i32> %op1 to <2 x float>
 426   ret <2 x float> %res
 427 }
 428
 429 ; Don't use SVE for 128-bit vectors.
 430 define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) #0 {
 431 ; CHECK-LABEL: ucvtf_v4i32_v4f32:
 432 ; CHECK: ucvtf v0.4s, v0.4s
 433 ; CHECK-NEXT: ret
 434   %res = uitofp <4 x i32> %op1 to <4 x float>
 435   ret <4 x float> %res
 436 }
 437
 438 define void @ucvtf_v8i32_v8f32(<8 x i32>* %a, <8 x float>* %b) #0 {
 439 ; CHECK-LABEL: ucvtf_v8i32_v8f32:
 440 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 441 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 442 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 443 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 444 ; CHECK-NEXT: ret
 445   %op1 = load <8 x i32>, <8 x i32>* %a
 446   %res = uitofp <8 x i32> %op1 to <8 x float>
 447   store <8 x float> %res, <8 x float>* %b
 448   ret void
 449 }
 450
 451 define void @ucvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {
 452 ; CHECK-LABEL: ucvtf_v16i32_v16f32:
 453 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 454 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 455 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 456 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 457 ; VBITS_GE_512-NEXT: ret
 458
 459 ; Ensure sensible type legalisation.
 460 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 461 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 462 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 463 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 464 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
 465 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
 466 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
 467 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
 468 ; VBITS_EQ_256-NEXT: ret
 469   %op1 = load <16 x i32>, <16 x i32>* %a
 470   %res = uitofp <16 x i32> %op1 to <16 x float>
 471   store <16 x float> %res, <16 x float>* %b
 472   ret void
 473 }
 474
 475 define void @ucvtf_v32i32_v32f32(<32 x i32>* %a, <32 x float>* %b) #0 {
 476 ; CHECK-LABEL: ucvtf_v32i32_v32f32:
 477 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 478 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 479 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 480 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 481 ; VBITS_GE_1024-NEXT: ret
 482   %op1 = load <32 x i32>, <32 x i32>* %a
 483   %res = uitofp <32 x i32> %op1 to <32 x float>
 484   store <32 x float> %res, <32 x float>* %b
 485   ret void
 486 }
 487
 488 define void @ucvtf_v64i32_v64f32(<64 x i32>* %a, <64 x float>* %b) #0 {
 489 ; CHECK-LABEL: ucvtf_v64i32_v64f32:
 490 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 491 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 492 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 493 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 494 ; VBITS_GE_2048-NEXT: ret
 495   %op1 = load <64 x i32>, <64 x i32>* %a
 496   %res = uitofp <64 x i32> %op1 to <64 x float>
 497   store <64 x float> %res, <64 x float>* %b
 498   ret void
 499 }
 500
 501 ;
 502 ; UCVTF S -> D
 503 ;
 504
 505 ; Don't use SVE for 64-bit vectors.
 506 define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) #0 {
 507 ; CHECK-LABEL: ucvtf_v1i32_v1f64:
 508 ; CHECK: ushll v0.2d, v0.2s, #0
 509 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
 510 ; CHECK-NEXT: ret
 511   %res = uitofp <1 x i32> %op1 to <1 x double>
 512   ret <1 x double> %res
 513 }
 514
 515 ; Don't use SVE for 128-bit vectors.
 516 define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) #0 {
 517 ; CHECK-LABEL: ucvtf_v2i32_v2f64:
 518 ; CHECK: ushll v0.2d, v0.2s, #0
 519 ; CHECK-NEXT: ucvtf v0.2d, v0.2d
 520 ; CHECK-NEXT: ret
 521   %res = uitofp <2 x i32> %op1 to <2 x double>
 522   ret <2 x double> %res
 523 }
 524
 525 define void @ucvtf_v4i32_v4f64(<4 x i32>* %a, <4 x double>* %b) #0 {
 526 ; CHECK-LABEL: ucvtf_v4i32_v4f64:
 527 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
 528 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
 529 ; CHECK-NEXT: uunpklo [[UPK:z[0-9]+]].d, z[[OP]].s
 530 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK]].d
 531 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 532 ; CHECK-NEXT: ret
 533   %op1 = load <4 x i32>, <4 x i32>* %a
 534   %res = uitofp <4 x i32> %op1 to <4 x double>
 535   store <4 x double> %res, <4 x double>* %b
 536   ret void
 537 }
 538
 539 define void @ucvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
 540 ; CHECK-LABEL: ucvtf_v8i32_v8f64:
 541 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl8
 542 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
 543 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
 544 ; VBITS_GE_512-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[OP]].s
 545 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG1]]/m, [[UPK]].d
 546 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
 547 ; VBITS_GE_512-NEXT: ret
 548
 549 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
 550 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
 551 ; VBITS_EQ_256-DAG: ld1w { [[VEC:z[0-9]+]].s }, [[PG1]]/z, [x0]
 552 ; VBITS_EQ_256-DAG: mov x8, sp
 553 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
 554 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
 555 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
 556 ; VBITS_EQ_256-DAG: mov x[[NUMELTS]], #4
 557 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
 558 ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
 559 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d
 560 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d
 561 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
 562 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
 563   %op1 = load <8 x i32>, <8 x i32>* %a
 564   %res = uitofp <8 x i32> %op1 to <8 x double>
 565   store <8 x double> %res, <8 x double>* %b
 566   ret void
 567 }
 568
 569 define void @ucvtf_v16i32_v16f64(<16 x i32>* %a, <16 x double>* %b) #0 {
 570 ; CHECK-LABEL: ucvtf_v16i32_v16f64:
 571 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl16
 572 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
 573 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
 574 ; VBITS_GE_1024-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[OP]].s
 575 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
 576 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
 577 ; VBITS_GE_1024-NEXT: ret
 578   %op1 = load <16 x i32>, <16 x i32>* %a
 579   %res = uitofp <16 x i32> %op1 to <16 x double>
 580   store <16 x double> %res, <16 x double>* %b
 581   ret void
 582 }
 583
 584 define void @ucvtf_v32i32_v32f64(<32 x i32>* %a, <32 x double>* %b) #0 {
 585 ; CHECK-LABEL: ucvtf_v32i32_v32f64:
 586 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl32
 587 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
 588 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
 589 ; VBITS_GE_2048-NEXT: uunpklo [[UPK:z[0-9]+]].d, [[OP]].s
 590 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
 591 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
 592 ; VBITS_GE_2048-NEXT: ret
 593   %op1 = load <32 x i32>, <32 x i32>* %a
 594   %res = uitofp <32 x i32> %op1 to <32 x double>
 595   store <32 x double> %res, <32 x double>* %b
 596   ret void
 597 }
 598
 599
 600 ;
 601 ; UCVTF D -> H
 602 ;
 603
 604 ; Don't use SVE for 64-bit vectors.
 605 define <1 x half> @ucvtf_v1i64_v1f16(<1 x i64> %op1) #0 {
 606 ; CHECK-LABEL: ucvtf_v1i64_v1f16:
 607 ; CHECK: fmov x8, d0
 608 ; CHECK-NEXT: ucvtf h0, x8
 609 ; CHECK-NEXT: ret
 610   %res = uitofp <1 x i64> %op1 to <1 x half>
 611   ret <1 x half> %res
 612 }
 613
 614 ; v2f16 is not legal for NEON, so use SVE
 615 define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) #0 {
 616 ; CHECK-LABEL: ucvtf_v2i64_v2f16:
 617 ; CHECK: ptrue [[PG:p[0-9]+]].d
 618 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG]]/m, z0.d
 619 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 620 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
 621 ; CHECK-NEXT: ret
 622   %res = uitofp <2 x i64> %op1 to <2 x half>
 623   ret <2 x half> %res
 624 }
 625
 626 define <4 x half> @ucvtf_v4i64_v4f16(<4 x i64>* %a) #0 {
 627 ; CHECK-LABEL: ucvtf_v4i64_v4f16:
 628 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
 629 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 630 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
 631 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
 632 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 633 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
 634 ; CHECK-NEXT: ret
 635   %op1 = load <4 x i64>, <4 x i64>* %a
 636   %res = uitofp <4 x i64> %op1 to <4 x half>
 637   ret <4 x half> %res
 638 }
 639
 640 define <8 x half> @ucvtf_v8i64_v8f16(<8 x i64>* %a) #0 {
 641 ; CHECK-LABEL: ucvtf_v8i64_v8f16:
 642 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
 643 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 644 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
 645 ; VBITS_GE_512-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
 646 ; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 647 ; VBITS_GE_512-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
 648 ; VBITS_GE_512-NEXT: ret
 649
 650 ; Ensure sensible type legalisation.
 651 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
 652 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 653 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 654 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 655 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 656 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d
 657 ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d
 658 ; VBITS_EQ_256-DAG: uzp1 [[UZP_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
 659 ; VBITS_EQ_256-DAG: uzp1 [[UZP_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
 660 ; VBITS_EQ_256-DAG: uzp1 z0.h, [[UZP_LO]].h, [[UZP_LO]].h
 661 ; VBITS_EQ_256-DAG: uzp1 z[[RES_HI:[0-9]+]].h, [[UZP_HI]].h, [[UZP_HI]].h
 662 ; VBITS_EQ_256-NEXT: mov v0.d[1], v[[RES_HI]].d[0]
 663 ; VBITS_EQ_256-NEXT: ret
 664   %op1 = load <8 x i64>, <8 x i64>* %a
 665   %res = uitofp <8 x i64> %op1 to <8 x half>
 666   ret <8 x half> %res
 667 }
 668
 669 define void @ucvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) #0 {
 670 ; CHECK-LABEL: ucvtf_v16i64_v16f16:
 671 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
 672 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 673 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
 674 ; VBITS_GE_1024-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
 675 ; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 676 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
 677 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
 678 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
 679 ; VBITS_GE_1024-NEXT: ret
 680   %op1 = load <16 x i64>, <16 x i64>* %a
 681   %res = uitofp <16 x i64> %op1 to <16 x half>
 682   store <16 x half> %res, <16 x half>* %b
 683   ret void
 684 }
 685
 686 define void @ucvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) #0 {
 687 ; CHECK-LABEL: ucvtf_v32i64_v32f16:
 688 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
 689 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 690 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
 691 ; VBITS_GE_2048-NEXT: ucvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
 692 ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 693 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
 694 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
 695 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
 696 ; VBITS_GE_2048-NEXT: ret
 697   %op1 = load <32 x i64>, <32 x i64>* %a
 698   %res = uitofp <32 x i64> %op1 to <32 x half>
 699   store <32 x half> %res, <32 x half>* %b
 700   ret void
 701 }
 702
 703 ;
 704 ; UCVTF D -> S
 705 ;
 706
 707 ; Don't use SVE for 64-bit vectors.
 708 define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) #0 {
 709 ; CHECK-LABEL: ucvtf_v1i64_v1f32:
 710 ; CHECK: ucvtf v0.2d, v0.2d
 711 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
 712 ; CHECK-NEXT: ret
 713   %res = uitofp <1 x i64> %op1 to <1 x float>
 714   ret <1 x float> %res
 715 }
 716
 717 ; Don't use SVE for 128-bit vectors.
 718 define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) #0 {
 719 ; CHECK-LABEL: ucvtf_v2i64_v2f32:
 720 ; CHECK: ucvtf v0.2d, v0.2d
 721 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
 722 ; CHECK-NEXT: ret
 723   %res = uitofp <2 x i64> %op1 to <2 x float>
 724   ret <2 x float> %res
 725 }
 726
 727 define <4 x float> @ucvtf_v4i64_v4f32(<4 x i64>* %a) #0 {
 728 ; CHECK-LABEL: ucvtf_v4i64_v4f32:
 729 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
 730 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 731 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
 732 ; CHECK-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
 733 ; CHECK-NEXT: uzp1 z0.s, [[CVT]].s, [[CVT]].s
 734 ; CHECK-NEXT: ret
 735   %op1 = load <4 x i64>, <4 x i64>* %a
 736   %res = uitofp <4 x i64> %op1 to <4 x float>
 737   ret <4 x float> %res
 738 }
 739
 740 define void @ucvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {
 741 ; CHECK-LABEL: ucvtf_v8i64_v8f32:
 742 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
 743 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 744 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
 745 ; VBITS_GE_512-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
 746 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 747 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].s, vl8
 748 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
 749 ; VBITS_GE_512-NEXT: ret
 750
 751 ; Ensure sensible type legalisation.
 752 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
 753 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 754 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 755 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 756 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
 757 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
 758 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d
 759 ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].d
 760 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
 761 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
 762 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].s, [[PG3]], [[RES_LO]].s, [[RES_HI]].s
 763 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].s, vl8
 764 ; VBITS_EQ_256-NEXT: st1w { [[RES]].s }, [[PG4]], [x1]
 765 ; VBITS_EQ_256-NEXT: ret
 766   %op1 = load <8 x i64>, <8 x i64>* %a
 767   %res = uitofp <8 x i64> %op1 to <8 x float>
 768   store <8 x float> %res, <8 x float>* %b
 769   ret void
 770 }
 771
 772 define void @ucvtf_v16i64_v16f32(<16 x i64>* %a, <16 x float>* %b) #0 {
 773 ; CHECK-LABEL: ucvtf_v16i64_v16f32:
 774 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
 775 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 776 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
 777 ; VBITS_GE_1024-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
 778 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 779 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].s, vl16
 780 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
 781 ; VBITS_GE_1024-NEXT: ret
 782   %op1 = load <16 x i64>, <16 x i64>* %a
 783   %res = uitofp <16 x i64> %op1 to <16 x float>
 784   store <16 x float> %res, <16 x float>* %b
 785   ret void
 786 }
 787
 788 define void @ucvtf_v32i64_v32f32(<32 x i64>* %a, <32 x float>* %b) #0 {
 789 ; CHECK-LABEL: ucvtf_v32i64_v32f32:
 790 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
 791 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
 792 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
 793 ; VBITS_GE_2048-NEXT: ucvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
 794 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
 795 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].s, vl32
 796 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
 797 ; VBITS_GE_2048-NEXT: ret
 798   %op1 = load <32 x i64>, <32 x i64>* %a
 799   %res = uitofp <32 x i64> %op1 to <32 x float>
 800   store <32 x float> %res, <32 x float>* %b
 801   ret void
 802 }
 803
 804 ;
 805 ; UCVTF D -> D
 806 ;
 807
 808 ; Don't use SVE for 64-bit vectors.
 809 define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) #0 {
 810 ; CHECK-LABEL: ucvtf_v1i64_v1f64:
 811 ; CHECK: fmov x8, d0
 812 ; CHECK-NEXT: ucvtf d0, x8
 813 ; CHECK-NEXT: ret
 814   %res = uitofp <1 x i64> %op1 to <1 x double>
 815   ret <1 x double> %res
 816 }
 817
 818 ; Don't use SVE for 128-bit vectors.
 819 define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) #0 {
 820 ; CHECK-LABEL: ucvtf_v2i64_v2f64:
 821 ; CHECK: ucvtf v0.2d, v0.2d
 822 ; CHECK-NEXT: ret
 823   %res = uitofp <2 x i64> %op1 to <2 x double>
 824   ret <2 x double> %res
 825 }
 826
 827 define void @ucvtf_v4i64_v4f64(<4 x i64>* %a, <4 x double>* %b) #0 {
 828 ; CHECK-LABEL: ucvtf_v4i64_v4f64:
 829 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 830 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 831 ; CHECK-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 832 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 833 ; CHECK-NEXT: ret
 834   %op1 = load <4 x i64>, <4 x i64>* %a
 835   %res = uitofp <4 x i64> %op1 to <4 x double>
 836   store <4 x double> %res, <4 x double>* %b
 837   ret void
 838 }
 839
 840 define void @ucvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {
 841 ; CHECK-LABEL: ucvtf_v8i64_v8f64:
 842 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 843 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 844 ; VBITS_GE_512-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 845 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 846 ; VBITS_GE_512-NEXT: ret
 847
 848 ; Ensure sensible type legalisation.
 849 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 850 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 851 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 852 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 853 ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
 854 ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
 855 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
 856 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
 857 ; VBITS_EQ_256-NEXT: ret
 858   %op1 = load <8 x i64>, <8 x i64>* %a
 859   %res = uitofp <8 x i64> %op1 to <8 x double>
 860   store <8 x double> %res, <8 x double>* %b
 861   ret void
 862 }
 863
 864 define void @ucvtf_v16i64_v16f64(<16 x i64>* %a, <16 x double>* %b) #0 {
 865 ; CHECK-LABEL: ucvtf_v16i64_v16f64:
 866 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 867 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 868 ; VBITS_GE_1024-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 869 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 870 ; VBITS_GE_1024-NEXT: ret
 871   %op1 = load <16 x i64>, <16 x i64>* %a
 872   %res = uitofp <16 x i64> %op1 to <16 x double>
 873   store <16 x double> %res, <16 x double>* %b
 874   ret void
 875 }
 876
 877 define void @ucvtf_v32i64_v32f64(<32 x i64>* %a, <32 x double>* %b) #0 {
 878 ; CHECK-LABEL: ucvtf_v32i64_v32f64:
 879 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
 880 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 881 ; VBITS_GE_2048-NEXT: ucvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 882 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 883 ; VBITS_GE_2048-NEXT: ret
 884   %op1 = load <32 x i64>, <32 x i64>* %a
 885   %res = uitofp <32 x i64> %op1 to <32 x double>
 886   store <32 x double> %res, <32 x double>* %b
 887   ret void
 888 }
 889
 890 ;
 891 ; SCVTF H -> H
 892 ;
 893
 894 ; Don't use SVE for 64-bit vectors.
 895 define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) #0 {
 896 ; CHECK-LABEL: scvtf_v4i16_v4f16:
 897 ; CHECK: scvtf v0.4h, v0.4h
 898 ; CHECK-NEXT: ret
 899   %res = sitofp <4 x i16> %op1 to <4 x half>
 900   ret <4 x half> %res
 901 }
 902
 903 ; Don't use SVE for 128-bit vectors.
 904 define void @scvtf_v8i16_v8f16(<8 x i16>* %a, <8 x half>* %b) #0 {
 905 ; CHECK-LABEL: scvtf_v8i16_v8f16:
 906 ; CHECK: ldr q0, [x0]
 907 ; CHECK-NEXT: scvtf v0.8h, v0.8h
 908 ; CHECK-NEXT: str q0, [x1]
 909 ; CHECK-NEXT: ret
 910   %op1 = load <8 x i16>, <8 x i16>* %a
 911   %res = sitofp <8 x i16> %op1 to <8 x half>
 912   store <8 x half> %res, <8 x half>* %b
 913   ret void
 914 }
 915
 916 define void @scvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) #0 {
 917 ; CHECK-LABEL: scvtf_v16i16_v16f16:
 918 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
 919 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 920 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 921 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 922 ; CHECK-NEXT: ret
 923   %op1 = load <16 x i16>, <16 x i16>* %a
 924   %res = sitofp <16 x i16> %op1 to <16 x half>
 925   store <16 x half> %res, <16 x half>* %b
 926   ret void
 927 }
 928
 929 define void @scvtf_v32i16_v32f16(<32 x i16>* %a, <32 x half>* %b) #0 {
 930 ; CHECK-LABEL: scvtf_v32i16_v32f16:
 931 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
 932 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 933 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 934 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 935 ; VBITS_GE_512-NEXT: ret
 936
 937 ; Ensure sensible type legalisation.
 938 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 939 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 940 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
 941 ; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 942 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h
 943 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h
 944 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1]
 945 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
 946 ; VBITS_EQ_256-NEXT: ret
 947   %op1 = load <32 x i16>, <32 x i16>* %a
 948   %res = sitofp <32 x i16> %op1 to <32 x half>
 949   store <32 x half> %res, <32 x half>* %b
 950   ret void
 951 }
 952
 953 define void @scvtf_v64i16_v64f16(<64 x i16>* %a, <64 x half>* %b) #0 {
 954 ; CHECK-LABEL: scvtf_v64i16_v64f16:
 955 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
 956 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 957 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 958 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 959 ; VBITS_GE_1024-NEXT: ret
 960   %op1 = load <64 x i16>, <64 x i16>* %a
 961   %res = sitofp <64 x i16> %op1 to <64 x half>
 962   store <64 x half> %res, <64 x half>* %b
 963   ret void
 964 }
 965
 966 define void @scvtf_v128i16_v128f16(<128 x i16>* %a, <128 x half>* %b) #0 {
 967 ; CHECK-LABEL: scvtf_v128i16_v128f16:
 968 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 969 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 970 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 971 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 972 ; VBITS_GE_2048-NEXT: ret
 973   %op1 = load <128 x i16>, <128 x i16>* %a
 974   %res = sitofp <128 x i16> %op1 to <128 x half>
 975   store <128 x half> %res, <128 x half>* %b
 976   ret void
 977 }
 978
 979 ;
 980 ; SCVTF H -> S
 981 ;
 982
 983 ; Don't use SVE for 64-bit vectors.
 984 define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) #0 {
 985 ; CHECK-LABEL: scvtf_v2i16_v2f32:
 986 ; CHECK: shl v0.2s, v0.2s, #16
 987 ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
 988 ; CHECK-NEXT: scvtf v0.2s, v0.2s
 989 ; CHECK-NEXT: ret
 990   %res = sitofp <2 x i16> %op1 to <2 x float>
 991   ret <2 x float> %res
 992 }
 993
 994 ; Don't use SVE for 128-bit vectors.
 995 define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) #0 {
 996 ; CHECK-LABEL: scvtf_v4i16_v4f32:
 997 ; CHECK: scvtf v0.4s, v0.4s
 998 ; CHECK-NEXT: ret
 999   %res = sitofp <4 x i16> %op1 to <4 x float>
1000   ret <4 x float> %res
1001 }
1002
1003 define void @scvtf_v8i16_v8f32(<8 x i16>* %a, <8 x float>* %b) #0 {
1004 ; CHECK-LABEL: scvtf_v8i16_v8f32:
1005 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
1006 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].s, vl8
1007 ; CHECK-NEXT: sunpklo [[UPK:z[0-9]+]].s, z[[OP]].h
1008 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[UPK]].s
1009 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1010 ; CHECK-NEXT: ret
1011   %op1 = load <8 x i16>, <8 x i16>* %a
1012   %res = sitofp <8 x i16> %op1 to <8 x float>
1013   store <8 x float> %res, <8 x float>* %b
1014   ret void
1015 }
1016
1017 define void @scvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
1018 ; CHECK-LABEL: scvtf_v16i16_v16f32:
1019 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].h, vl16
1020 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1021 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s, vl16
1022 ; VBITS_GE_512-NEXT: sunpklo [[UPK:z[0-9]+]].s, [[OP]].h
1023 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
1024 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
1025 ; VBITS_GE_512-NEXT: ret
1026
1027 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
1028 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl16
1029 ; VBITS_EQ_256-DAG: ld1h { [[VEC:z[0-9]+]].h }, [[PG1]]/z, [x0]
1030 ; VBITS_EQ_256-DAG: mov x8, sp
1031 ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8]
1032 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
1033 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8
1034 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1035 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h
1036 ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h
1037 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s
1038 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s
1039 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1]
1040 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2]
1041   %op1 = load <16 x i16>, <16 x i16>* %a
1042   %res = sitofp <16 x i16> %op1 to <16 x float>
1043   store <16 x float> %res, <16 x float>* %b
1044   ret void
1045 }
1046
1047 define void @scvtf_v32i16_v32f32(<32 x i16>* %a, <32 x float>* %b) #0 {
1048 ; CHECK-LABEL: scvtf_v32i16_v32f32:
1049 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl32
1050 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1051 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s, vl32
1052 ; VBITS_GE_1024-NEXT: sunpklo [[UPK:z[0-9]+]].s, [[OP]].h
1053 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
1054 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
1055 ; VBITS_GE_1024-NEXT: ret
1056   %op1 = load <32 x i16>, <32 x i16>* %a
1057   %res = sitofp <32 x i16> %op1 to <32 x float>
1058   store <32 x float> %res, <32 x float>* %b
1059   ret void
1060 }
1061
1062 define void @scvtf_v64i16_v64f32(<64 x i16>* %a, <64 x float>* %b) #0 {
1063 ; CHECK-LABEL: scvtf_v64i16_v64f32:
1064 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl64
1065 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1066 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s, vl64
1067 ; VBITS_GE_2048-NEXT: sunpklo [[UPK:z[0-9]+]].s, [[OP]].h
1068 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG2]]/m, [[UPK]].s
1069 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG1]], [x1]
1070 ; VBITS_GE_2048-NEXT: ret
1071   %op1 = load <64 x i16>, <64 x i16>* %a
1072   %res = sitofp <64 x i16> %op1 to <64 x float>
1073   store <64 x float> %res, <64 x float>* %b
1074   ret void
1075 }
1076
1077 ;
1078 ; SCVTF H -> D
1079 ;
1080
1081 ; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
1082 define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) #0 {
1083 ; CHECK-LABEL: scvtf_v1i16_v1f64:
1084 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1085 ; CHECK-NEXT: sunpklo [[UPK1:z[0-9]+]].s, z0.h
1086 ; CHECK-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1087 ; CHECK-NEXT: scvtf z0.d, [[PG]]/m, [[UPK2]].d
1088 ; CHECK-NEXT: ret
1089   %res = sitofp <1 x i16> %op1 to <1 x double>
1090   ret <1 x double> %res
1091 }
1092
1093 ; Don't use SVE for 128-bit vectors.
1094 define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) #0 {
1095 ; CHECK-LABEL: scvtf_v2i16_v2f64:
1096 ; CHECK: shl v0.2s, v0.2s, #16
1097 ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
1098 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0
1099 ; CHECK-NEXT: scvtf v0.2d, v0.2d
1100 ; CHECK-NEXT: ret
1101   %res = sitofp <2 x i16> %op1 to <2 x double>
1102   ret <2 x double> %res
1103 }
1104
1105 define void @scvtf_v4i16_v4f64(<4 x i16>* %a, <4 x double>* %b) #0 {
1106 ; CHECK-LABEL: scvtf_v4i16_v4f64:
1107 ; CHECK: ldr d[[OP:[0-9]+]], [x0]
1108 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
1109 ; CHECK-NEXT: sunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
1110 ; CHECK-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1111 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
1112 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1113 ; CHECK-NEXT: ret
1114   %op1 = load <4 x i16>, <4 x i16>* %a
1115   %res = sitofp <4 x i16> %op1 to <4 x double>
1116   store <4 x double> %res, <4 x double>* %b
1117   ret void
1118 }
1119
1120 define void @scvtf_v8i16_v8f64(<8 x i16>* %a, <8 x double>* %b) #0 {
1121 ; CHECK-LABEL: scvtf_v8i16_v8f64:
1122 ; VBITS_GE_512: ldr q[[OP:[0-9]+]], [x0]
1123 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
1124 ; VBITS_GE_512-NEXT: sunpklo [[UPK1:z[0-9]+]].s, z[[OP]].h
1125 ; VBITS_GE_512-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1126 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK2]].d
1127 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1128 ; VBITS_GE_512-NEXT: ret
1129
1130 ; Ensure sensible type legalisation.
1131 ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0]
1132 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
1133 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1134 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8
1135 ; VBITS_EQ_256-DAG: sunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h
1136 ; VBITS_EQ_256-DAG: sunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h
1137 ; VBITS_EQ_256-DAG: sunpklo [[UPK2_LO:z[0-9]+]].d, [[UPK1_LO]].s
1138 ; VBITS_EQ_256-DAG: sunpklo [[UPK2_HI:z[0-9]+]].d, [[UPK1_HI]].s
1139 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d
1140 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d
1141 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
1142 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
1143 ; VBITS_EQ_256-NEXT: ret
1144   %op1 = load <8 x i16>, <8 x i16>* %a
1145   %res = sitofp <8 x i16> %op1 to <8 x double>
1146   store <8 x double> %res, <8 x double>* %b
1147   ret void
1148 }
1149
1150 define void @scvtf_v16i16_v16f64(<16 x i16>* %a, <16 x double>* %b) #0 {
1151 ; CHECK-LABEL: scvtf_v16i16_v16f64:
1152 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].h, vl16
1153 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1154 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
1155 ; VBITS_GE_1024-NEXT: sunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
1156 ; VBITS_GE_1024-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK1]].s
1157 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
1158 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1159 ; VBITS_GE_1024-NEXT: ret
1160   %op1 = load <16 x i16>, <16 x i16>* %a
1161   %res = sitofp <16 x i16> %op1 to <16 x double>
1162   store <16 x double> %res, <16 x double>* %b
1163   ret void
1164 }
1165
1166 define void @scvtf_v32i16_v32f64(<32 x i16>* %a, <32 x double>* %b) #0 {
1167 ; CHECK-LABEL: scvtf_v32i16_v32f64:
1168 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].h, vl32
1169 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG1]]/z, [x0]
1170 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
1171 ; VBITS_GE_2048-NEXT: sunpklo [[UPK1:z[0-9]+]].s, [[OP]].h
1172 ; VBITS_GE_2048-NEXT: sunpklo [[UPK2:z[0-9]+]].d, [[UPK]].s
1173 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK2]].d
1174 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1175 ; VBITS_GE_2048-NEXT: ret
1176   %op1 = load <32 x i16>, <32 x i16>* %a
1177   %res = sitofp <32 x i16> %op1 to <32 x double>
1178   store <32 x double> %res, <32 x double>* %b
1179   ret void
1180 }
1181
1182 ;
1183 ; SCVTF S -> H
1184 ;
1185
1186 ; Don't use SVE for 64-bit vectors.
1187 define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) #0 {
1188 ; CHECK-LABEL: scvtf_v2i32_v2f16:
1189 ; CHECK: scvtf v0.4s, v0.4s
1190 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
1191 ; CHECK-NEXT: ret
1192   %res = sitofp <2 x i32> %op1 to <2 x half>
1193   ret <2 x half> %res
1194 }
1195
1196 ; Don't use SVE for 128-bit vectors.
1197 define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) #0 {
1198 ; CHECK-LABEL: scvtf_v4i32_v4f16:
1199 ; CHECK: scvtf v0.4s, v0.4s
1200 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
1201 ; CHECK-NEXT: ret
1202   %res = sitofp <4 x i32> %op1 to <4 x half>
1203   ret <4 x half> %res
1204 }
1205
1206 define <8 x half> @scvtf_v8i32_v8f16(<8 x i32>* %a) #0 {
1207 ; CHECK-LABEL: scvtf_v8i32_v8f16:
1208 ; CHECK: ptrue [[PG1:p[0-9]+]].s, vl8
1209 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1210 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].s
1211 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
1212 ; CHECK-NEXT: uzp1 z0.h, [[CVT]].h, [[CVT]].h
1213 ; CHECK-NEXT: ret
1214   %op1 = load <8 x i32>, <8 x i32>* %a
1215   %res = sitofp <8 x i32> %op1 to <8 x half>
1216   ret <8 x half> %res
1217 }
1218
1219 define void @scvtf_v16i32_v16f16(<16 x i32>* %a, <16 x half>* %b) #0 {
1220 ; CHECK-LABEL: scvtf_v16i32_v16f16:
1221 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl16
1222 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1223 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].s
1224 ; VBITS_GE_512-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
1225 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
1226 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
1227 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1228 ; VBITS_GE_512-NEXT: ret
1229
1230 ; Ensure sensible type legalisation.
1231 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
1232 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1233 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1234 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1235 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s
1236 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8
1237 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s
1238 ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].s
1239 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].h, [[CVT_LO]].h, [[CVT_LO]].h
1240 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].h, [[CVT_HI]].h, [[CVT_HI]].h
1241 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].h, [[PG3]], [[RES_LO]].h, [[RES_HI]].h
1242 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].h, vl16
1243 ; VBITS_EQ_256-NEXT: st1h { [[RES]].h }, [[PG4]], [x1]
1244 ; VBITS_EQ_256-NEXT: ret
1245   %op1 = load <16 x i32>, <16 x i32>* %a
1246   %res = sitofp <16 x i32> %op1 to <16 x half>
1247   store <16 x half> %res, <16 x half>* %b
1248   ret void
1249 }
1250
1251 define void @scvtf_v32i32_v32f16(<32 x i32>* %a, <32 x half>* %b) #0 {
1252 ; CHECK-LABEL: scvtf_v32i32_v32f16:
1253 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl32
1254 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1255 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].s
1256 ; VBITS_GE_1024-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].s
1257 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
1258 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
1259 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1260 ; VBITS_GE_1024-NEXT: ret
1261   %op1 = load <32 x i32>, <32 x i32>* %a
1262   %res = sitofp <32 x i32> %op1 to <32 x half>
1263   store <32 x half> %res, <32 x half>* %b
1264   ret void
1265 }
1266
1267 define void @scvtf_v64i32_v64f16(<64 x i32>* %a, <64 x half>* %b) #0 {
1268 ; CHECK-LABEL: scvtf_v64i32_v64f16:
1269 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl64
1270 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1271 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].s
1272 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].h, [[PG2]]/m, [[UPK]].s
1273 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[CVT]].h, [[CVT]].h
1274 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl64
1275 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1276 ; VBITS_GE_2048-NEXT: ret
1277   %op1 = load <64 x i32>, <64 x i32>* %a
1278   %res = sitofp <64 x i32> %op1 to <64 x half>
1279   store <64 x half> %res, <64 x half>* %b
1280   ret void
1281 }
1282
1283 ;
1284 ; SCVTF S -> S
1285 ;
1286
1287 ; Don't use SVE for 64-bit vectors.
1288 define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) #0 {
1289 ; CHECK-LABEL: scvtf_v2i32_v2f32:
1290 ; CHECK: scvtf v0.2s, v0.2s
1291 ; CHECK-NEXT: ret
1292   %res = sitofp <2 x i32> %op1 to <2 x float>
1293   ret <2 x float> %res
1294 }
1295
1296 ; Don't use SVE for 128-bit vectors.
1297 define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) #0 {
1298 ; CHECK-LABEL: scvtf_v4i32_v4f32:
1299 ; CHECK: scvtf v0.4s, v0.4s
1300 ; CHECK-NEXT: ret
1301   %res = sitofp <4 x i32> %op1 to <4 x float>
1302   ret <4 x float> %res
1303 }
1304
1305 define void @scvtf_v8i32_v8f32(<8 x i32>* %a, <8 x float>* %b) #0 {
1306 ; CHECK-LABEL: scvtf_v8i32_v8f32:
1307 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1308 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1309 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1310 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1311 ; CHECK-NEXT: ret
1312   %op1 = load <8 x i32>, <8 x i32>* %a
1313   %res = sitofp <8 x i32> %op1 to <8 x float>
1314   store <8 x float> %res, <8 x float>* %b
1315   ret void
1316 }
1317
1318 define void @scvtf_v16i32_v16f32(<16 x i32>* %a, <16 x float>* %b) #0 {
1319 ; CHECK-LABEL: scvtf_v16i32_v16f32:
1320 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1321 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1322 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1323 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1324 ; VBITS_GE_512-NEXT: ret
1325
1326 ; Ensure sensible type legalisation.
1327 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1328 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1329 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1330 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1331 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s
1332 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s
1333 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1]
1334 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
1335 ; VBITS_EQ_256-NEXT: ret
1336   %op1 = load <16 x i32>, <16 x i32>* %a
1337   %res = sitofp <16 x i32> %op1 to <16 x float>
1338   store <16 x float> %res, <16 x float>* %b
1339   ret void
1340 }
1341
1342 define void @scvtf_v32i32_v32f32(<32 x i32>* %a, <32 x float>* %b) #0 {
1343 ; CHECK-LABEL: scvtf_v32i32_v32f32:
1344 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1345 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1346 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1347 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1348 ; VBITS_GE_1024-NEXT: ret
1349   %op1 = load <32 x i32>, <32 x i32>* %a
1350   %res = sitofp <32 x i32> %op1 to <32 x float>
1351   store <32 x float> %res, <32 x float>* %b
1352   ret void
1353 }
1354
1355 define void @scvtf_v64i32_v64f32(<64 x i32>* %a, <64 x float>* %b) #0 {
1356 ; CHECK-LABEL: scvtf_v64i32_v64f32:
1357 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1358 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1359 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1360 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
1361 ; VBITS_GE_2048-NEXT: ret
1362   %op1 = load <64 x i32>, <64 x i32>* %a
1363   %res = sitofp <64 x i32> %op1 to <64 x float>
1364   store <64 x float> %res, <64 x float>* %b
1365   ret void
1366 }
1367
1368 ;
1369 ; SCVTF S -> D
1370 ;
1371
1372 ; Don't use SVE for 64-bit vectors.
1373 define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) #0 {
1374 ; CHECK-LABEL: scvtf_v1i32_v1f64:
1375 ; CHECK: sshll v0.2d, v0.2s, #0
1376 ; CHECK-NEXT: scvtf v0.2d, v0.2d
1377 ; CHECK-NEXT: ret
1378   %res = sitofp <1 x i32> %op1 to <1 x double>
1379   ret <1 x double> %res
1380 }
1381
1382 ; Don't use SVE for 128-bit vectors.
1383 define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) #0 {
1384 ; CHECK-LABEL: scvtf_v2i32_v2f64:
1385 ; CHECK: sshll v0.2d, v0.2s, #0
1386 ; CHECK-NEXT: scvtf v0.2d, v0.2d
1387 ; CHECK-NEXT: ret
1388   %res = sitofp <2 x i32> %op1 to <2 x double>
1389   ret <2 x double> %res
1390 }
1391
1392 define void @scvtf_v4i32_v4f64(<4 x i32>* %a, <4 x double>* %b) #0 {
1393 ; CHECK-LABEL: scvtf_v4i32_v4f64:
1394 ; CHECK: ldr q[[OP:[0-9]+]], [x0]
1395 ; CHECK-NEXT: ptrue [[PG:p[0-9]+]].d, vl4
1396 ; CHECK-NEXT: sunpklo [[UPK:z[0-9]+]].d, z[[OP]].s
1397 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[UPK]].d
1398 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1399 ; CHECK-NEXT: ret
1400   %op1 = load <4 x i32>, <4 x i32>* %a
1401   %res = sitofp <4 x i32> %op1 to <4 x double>
1402   store <4 x double> %res, <4 x double>* %b
1403   ret void
1404 }
1405
1406 define void @scvtf_v8i32_v8f64(<8 x i32>* %a, <8 x double>* %b) #0 {
1407 ; CHECK-LABEL: scvtf_v8i32_v8f64:
1408 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].s, vl8
1409 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1410 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
1411 ; VBITS_GE_512-NEXT: sunpklo [[UPK:z[0-9]+]].d, [[OP]].s
1412 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG1]]/m, [[UPK]].d
1413 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1414 ; VBITS_GE_512-NEXT: ret
1415
1416 ; Ensure sensible type legalisation - fixed type extract_subvector codegen is poor currently.
1417 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8
1418 ; VBITS_EQ_256-DAG: ld1w { [[VEC:z[0-9]+]].s }, [[PG1]]/z, [x0]
1419 ; VBITS_EQ_256-DAG: mov x8, sp
1420 ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8]
1421 ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp]
1422 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4
1423 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1424 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s
1425 ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s
1426 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d
1427 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d
1428 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1]
1429 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3]
1430   %op1 = load <8 x i32>, <8 x i32>* %a
1431   %res = sitofp <8 x i32> %op1 to <8 x double>
1432   store <8 x double> %res, <8 x double>* %b
1433   ret void
1434 }
1435
1436 define void @scvtf_v16i32_v16f64(<16 x i32>* %a, <16 x double>* %b) #0 {
1437 ; CHECK-LABEL: scvtf_v16i32_v16f64:
1438 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].s, vl16
1439 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1440 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d, vl16
1441 ; VBITS_GE_1024-NEXT: sunpklo [[UPK:z[0-9]+]].d, [[OP]].s
1442 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
1443 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1444 ; VBITS_GE_1024-NEXT: ret
1445   %op1 = load <16 x i32>, <16 x i32>* %a
1446   %res = sitofp <16 x i32> %op1 to <16 x double>
1447   store <16 x double> %res, <16 x double>* %b
1448   ret void
1449 }
1450
1451 define void @scvtf_v32i32_v32f64(<32 x i32>* %a, <32 x double>* %b) #0 {
1452 ; CHECK-LABEL: scvtf_v32i32_v32f64:
1453 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].s, vl32
1454 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG1]]/z, [x0]
1455 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d, vl32
1456 ; VBITS_GE_2048-NEXT: sunpklo [[UPK:z[0-9]+]].d, [[OP]].s
1457 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG2]]/m, [[UPK]].d
1458 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG1]], [x1]
1459 ; VBITS_GE_2048-NEXT: ret
1460   %op1 = load <32 x i32>, <32 x i32>* %a
1461   %res = sitofp <32 x i32> %op1 to <32 x double>
1462   store <32 x double> %res, <32 x double>* %b
1463   ret void
1464 }
1465
1466
1467 ;
1468 ; SCVTF D -> H
1469 ;
1470
1471 ; Don't use SVE for 64-bit vectors.
1472 define <1 x half> @scvtf_v1i64_v1f16(<1 x i64> %op1) #0 {
1473 ; CHECK-LABEL: scvtf_v1i64_v1f16:
1474 ; CHECK: fmov x8, d0
1475 ; CHECK-NEXT: scvtf h0, x8
1476 ; CHECK-NEXT: ret
1477   %res = sitofp <1 x i64> %op1 to <1 x half>
1478   ret <1 x half> %res
1479 }
1480
1481 ; v2f16 is not legal for NEON, so use SVE
1482 define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) #0 {
1483 ; CHECK-LABEL: scvtf_v2i64_v2f16:
1484 ; CHECK: ptrue [[PG:p[0-9]+]].d
1485 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG]]/m, z0.d
1486 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1487 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
1488 ; CHECK-NEXT: ret
1489   %res = sitofp <2 x i64> %op1 to <2 x half>
1490   ret <2 x half> %res
1491 }
1492
1493 define <4 x half> @scvtf_v4i64_v4f16(<4 x i64>* %a) #0 {
1494 ; CHECK-LABEL: scvtf_v4i64_v4f16:
1495 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
1496 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1497 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
1498 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1499 ; CHECK-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1500 ; CHECK-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
1501 ; CHECK-NEXT: ret
1502   %op1 = load <4 x i64>, <4 x i64>* %a
1503   %res = sitofp <4 x i64> %op1 to <4 x half>
1504   ret <4 x half> %res
1505 }
1506
1507 define <8 x half> @scvtf_v8i64_v8f16(<8 x i64>* %a) #0 {
1508 ; CHECK-LABEL: scvtf_v8i64_v8f16:
1509 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
1510 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1511 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
1512 ; VBITS_GE_512-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1513 ; VBITS_GE_512-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1514 ; VBITS_GE_512-NEXT: uzp1 z0.h, [[UZP]].h, [[UZP]].h
1515 ; VBITS_GE_512-NEXT: ret
1516
1517 ; Ensure sensible type legalisation.
1518 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
1519 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1520 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1521 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1522 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
1523 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d
1524 ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d
1525 ; VBITS_EQ_256-DAG: uzp1 [[UZP_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
1526 ; VBITS_EQ_256-DAG: uzp1 [[UZP_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
1527 ; VBITS_EQ_256-DAG: uzp1 z[[RES_LO:[0-9]+]].h, [[UZP_LO]].h, [[UZP_LO]].h
1528 ; VBITS_EQ_256-DAG: uzp1 z[[RES_HI:[0-9]+]].h, [[UZP_HI]].h, [[UZP_HI]].h
1529 ; VBITS_EQ_256-NEXT: mov v[[RES_LO]].d[1], v[[RES_HI]].d[0]
1530 ; VBITS_EQ_256-NEXT: ret
1531   %op1 = load <8 x i64>, <8 x i64>* %a
1532   %res = sitofp <8 x i64> %op1 to <8 x half>
1533   ret <8 x half> %res
1534 }
1535
1536 define void @scvtf_v16i64_v16f16(<16 x i64>* %a, <16 x half>* %b) #0 {
1537 ; CHECK-LABEL: scvtf_v16i64_v16f16:
1538 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
1539 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1540 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
1541 ; VBITS_GE_1024-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1542 ; VBITS_GE_1024-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1543 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
1544 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].h, vl16
1545 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1546 ; VBITS_GE_1024-NEXT: ret
1547   %op1 = load <16 x i64>, <16 x i64>* %a
1548   %res = sitofp <16 x i64> %op1 to <16 x half>
1549   store <16 x half> %res, <16 x half>* %b
1550   ret void
1551 }
1552
1553 define void @scvtf_v32i64_v32f16(<32 x i64>* %a, <32 x half>* %b) #0 {
1554 ; CHECK-LABEL: scvtf_v32i64_v32f16:
1555 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
1556 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1557 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
1558 ; VBITS_GE_2048-NEXT: scvtf [[CVT:z[0-9]+]].h, [[PG2]]/m, [[OP]].d
1559 ; VBITS_GE_2048-NEXT: uzp1 [[UZP:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1560 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].h, [[UZP]].h, [[UZP]].h
1561 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].h, vl32
1562 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG3]], [x1]
1563 ; VBITS_GE_2048-NEXT: ret
1564   %op1 = load <32 x i64>, <32 x i64>* %a
1565   %res = sitofp <32 x i64> %op1 to <32 x half>
1566   store <32 x half> %res, <32 x half>* %b
1567   ret void
1568 }
1569
1570 ;
1571 ; SCVTF D -> S
1572 ;
1573
1574 ; Don't use SVE for 64-bit vectors.
1575 define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) #0 {
1576 ; CHECK-LABEL: scvtf_v1i64_v1f32:
1577 ; CHECK: scvtf v0.2d, v0.2d
1578 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
1579 ; CHECK-NEXT: ret
1580   %res = sitofp <1 x i64> %op1 to <1 x float>
1581   ret <1 x float> %res
1582 }
1583
1584 ; Don't use SVE for 128-bit vectors.
1585 define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) #0 {
1586 ; CHECK-LABEL: scvtf_v2i64_v2f32:
1587 ; CHECK: scvtf v0.2d, v0.2d
1588 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
1589 ; CHECK-NEXT: ret
1590   %res = sitofp <2 x i64> %op1 to <2 x float>
1591   ret <2 x float> %res
1592 }
1593
1594 define <4 x float> @scvtf_v4i64_v4f32(<4 x i64>* %a) #0 {
1595 ; CHECK-LABEL: scvtf_v4i64_v4f32:
1596 ; CHECK: ptrue [[PG1:p[0-9]+]].d, vl4
1597 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1598 ; CHECK-NEXT: ptrue [[PG2:p[0-9]+]].d
1599 ; CHECK-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1600 ; CHECK-NEXT: uzp1 z0.s, [[CVT]].s, [[CVT]].s
1601 ; CHECK-NEXT: ret
1602   %op1 = load <4 x i64>, <4 x i64>* %a
1603   %res = sitofp <4 x i64> %op1 to <4 x float>
1604   ret <4 x float> %res
1605 }
1606
1607 define void @scvtf_v8i64_v8f32(<8 x i64>* %a, <8 x float>* %b) #0 {
1608 ; CHECK-LABEL: scvtf_v8i64_v8f32:
1609 ; VBITS_GE_512: ptrue [[PG1:p[0-9]+]].d, vl8
1610 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1611 ; VBITS_GE_512-NEXT: ptrue [[PG2:p[0-9]+]].d
1612 ; VBITS_GE_512-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1613 ; VBITS_GE_512-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1614 ; VBITS_GE_512-NEXT: ptrue [[PG3:p[0-9]+]].s, vl8
1615 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
1616 ; VBITS_GE_512-NEXT: ret
1617
1618 ; Ensure sensible type legalisation.
1619 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4
1620 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1621 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1622 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1623 ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d
1624 ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4
1625 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d
1626 ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].d
1627 ; VBITS_EQ_256-DAG: uzp1 [[RES_LO:z[0-9]+]].s, [[CVT_LO]].s, [[CVT_LO]].s
1628 ; VBITS_EQ_256-DAG: uzp1 [[RES_HI:z[0-9]+]].s, [[CVT_HI]].s, [[CVT_HI]].s
1629 ; VBITS_EQ_256-DAG: splice [[RES:z[0-9]+]].s, [[PG3]], [[RES_LO]].s, [[RES_HI]].s
1630 ; VBITS_EQ_256-DAG: ptrue [[PG4:p[0-9]+]].s, vl8
1631 ; VBITS_EQ_256-NEXT: st1w { [[RES]].s }, [[PG4]], [x1]
1632 ; VBITS_EQ_256-NEXT: ret
1633   %op1 = load <8 x i64>, <8 x i64>* %a
1634   %res = sitofp <8 x i64> %op1 to <8 x float>
1635   store <8 x float> %res, <8 x float>* %b
1636   ret void
1637 }
1638
1639 define void @scvtf_v16i64_v16f32(<16 x i64>* %a, <16 x float>* %b) #0 {
1640 ; CHECK-LABEL: scvtf_v16i64_v16f32:
1641 ; VBITS_GE_1024: ptrue [[PG1:p[0-9]+]].d, vl16
1642 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1643 ; VBITS_GE_1024-NEXT: ptrue [[PG2:p[0-9]+]].d
1644 ; VBITS_GE_1024-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1645 ; VBITS_GE_1024-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1646 ; VBITS_GE_1024-NEXT: ptrue [[PG3:p[0-9]+]].s, vl16
1647 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
1648 ; VBITS_GE_1024-NEXT: ret
1649   %op1 = load <16 x i64>, <16 x i64>* %a
1650   %res = sitofp <16 x i64> %op1 to <16 x float>
1651   store <16 x float> %res, <16 x float>* %b
1652   ret void
1653 }
1654
1655 define void @scvtf_v32i64_v32f32(<32 x i64>* %a, <32 x float>* %b) #0 {
1656 ; CHECK-LABEL: scvtf_v32i64_v32f32:
1657 ; VBITS_GE_2048: ptrue [[PG1:p[0-9]+]].d, vl32
1658 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG1]]/z, [x0]
1659 ; VBITS_GE_2048-NEXT: ptrue [[PG2:p[0-9]+]].d
1660 ; VBITS_GE_2048-NEXT: scvtf [[CVT:z[0-9]+]].s, [[PG2]]/m, [[OP]].d
1661 ; VBITS_GE_2048-NEXT: uzp1 [[RES:z[0-9]+]].s, [[CVT]].s, [[CVT]].s
1662 ; VBITS_GE_2048-NEXT: ptrue [[PG3:p[0-9]+]].s, vl32
1663 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG3]], [x1]
1664 ; VBITS_GE_2048-NEXT: ret
1665   %op1 = load <32 x i64>, <32 x i64>* %a
1666   %res = sitofp <32 x i64> %op1 to <32 x float>
1667   store <32 x float> %res, <32 x float>* %b
1668   ret void
1669 }
1670
1671 ;
1672 ; SCVTF D -> D
1673 ;
1674
1675 ; Don't use SVE for 64-bit vectors.
1676 define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) #0 {
1677 ; CHECK-LABEL: scvtf_v1i64_v1f64:
1678 ; CHECK: fmov x8, d0
1679 ; CHECK-NEXT: scvtf d0, x8
1680 ; CHECK-NEXT: ret
1681   %res = sitofp <1 x i64> %op1 to <1 x double>
1682   ret <1 x double> %res
1683 }
1684
1685 ; Don't use SVE for 128-bit vectors.
1686 define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) #0 {
1687 ; CHECK-LABEL: scvtf_v2i64_v2f64:
1688 ; CHECK: scvtf v0.2d, v0.2d
1689 ; CHECK-NEXT: ret
1690   %res = sitofp <2 x i64> %op1 to <2 x double>
1691   ret <2 x double> %res
1692 }
1693
1694 define void @scvtf_v4i64_v4f64(<4 x i64>* %a, <4 x double>* %b) #0 {
1695 ; CHECK-LABEL: scvtf_v4i64_v4f64:
1696 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1697 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1698 ; CHECK-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1699 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1700 ; CHECK-NEXT: ret
1701   %op1 = load <4 x i64>, <4 x i64>* %a
1702   %res = sitofp <4 x i64> %op1 to <4 x double>
1703   store <4 x double> %res, <4 x double>* %b
1704   ret void
1705 }
1706
1707 define void @scvtf_v8i64_v8f64(<8 x i64>* %a, <8 x double>* %b) #0 {
1708 ; CHECK-LABEL: scvtf_v8i64_v8f64:
1709 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1710 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1711 ; VBITS_GE_512-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1712 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1713 ; VBITS_GE_512-NEXT: ret
1714
1715 ; Ensure sensible type legalisation.
1716 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1717 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1718 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1719 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1720 ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d
1721 ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d
1722 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1]
1723 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
1724 ; VBITS_EQ_256-NEXT: ret
1725   %op1 = load <8 x i64>, <8 x i64>* %a
1726   %res = sitofp <8 x i64> %op1 to <8 x double>
1727   store <8 x double> %res, <8 x double>* %b
1728   ret void
1729 }
1730
1731 define void @scvtf_v16i64_v16f64(<16 x i64>* %a, <16 x double>* %b) #0 {
1732 ; CHECK-LABEL: scvtf_v16i64_v16f64:
1733 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1734 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1735 ; VBITS_GE_1024-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1736 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1737 ; VBITS_GE_1024-NEXT: ret
1738   %op1 = load <16 x i64>, <16 x i64>* %a
1739   %res = sitofp <16 x i64> %op1 to <16 x double>
1740   store <16 x double> %res, <16 x double>* %b
1741   ret void
1742 }
1743
1744 define void @scvtf_v32i64_v32f64(<32 x i64>* %a, <32 x double>* %b) #0 {
1745 ; CHECK-LABEL: scvtf_v32i64_v32f64:
1746 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1747 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1748 ; VBITS_GE_2048-NEXT: scvtf [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1749 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
1750 ; VBITS_GE_2048-NEXT: ret
1751   %op1 = load <32 x i64>, <32 x i64>* %a
1752   %res = sitofp <32 x i64> %op1 to <32 x double>
1753   store <32 x double> %res, <32 x double>* %b
1754   ret void
1755 }
1756
1757 attributes #0 = { "target-features"="+sve" }