llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll

   1 ; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
   2 ; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
   3 ; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
   4 ; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   5 ; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   6 ; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   7 ; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
   9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
  17
  18 target triple = "aarch64-unknown-linux-gnu"
  19
  20 ; Don't use SVE when its registers are no bigger than NEON.
  21 ; NO_SVE-NOT: ptrue
  22
  23 ;
  24 ; CEIL -> FRINTP
  25 ;
  26
  27 ; Don't use SVE for 64-bit vectors.
  28 define <4 x half> @frintp_v4f16(<4 x half> %op) #0 {
  29 ; CHECK-LABEL: frintp_v4f16:
  30 ; CHECK: frintp v0.4h, v0.4h
  31 ; CHECK-NEXT: ret
  32   %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op)
  33   ret <4 x half> %res
  34 }
  35
  36 ; Don't use SVE for 128-bit vectors.
  37 define <8 x half> @frintp_v8f16(<8 x half> %op) #0 {
  38 ; CHECK-LABEL: frintp_v8f16:
  39 ; CHECK: frintp v0.8h, v0.8h
  40 ; CHECK-NEXT: ret
  41   %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op)
  42   ret <8 x half> %res
  43 }
  44
  45 define void @frintp_v16f16(<16 x half>* %a) #0 {
  46 ; CHECK-LABEL: frintp_v16f16:
  47 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
  48 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
  49 ; CHECK-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
  50 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
  51 ; CHECK-NEXT: ret
  52   %op = load <16 x half>, <16 x half>* %a
  53   %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
  54   store <16 x half> %res, <16 x half>* %a
  55   ret void
  56 }
  57
  58 define void @frintp_v32f16(<32 x half>* %a) #0 {
  59 ; CHECK-LABEL: frintp_v32f16:
  60 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
  61 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
  62 ; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
  63 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
  64 ; VBITS_GE_512-NEXT: ret
  65
  66 ; Ensure sensible type legalisation.
  67 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
  68 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
  69 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
  70 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
  71 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
  72 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
  73 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
  74 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
  75 ; VBITS_EQ_256-NEXT: ret
  76   %op = load <32 x half>, <32 x half>* %a
  77   %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op)
  78   store <32 x half> %res, <32 x half>* %a
  79   ret void
  80 }
  81
  82 define void @frintp_v64f16(<64 x half>* %a) #0 {
  83 ; CHECK-LABEL: frintp_v64f16:
  84 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
  85 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
  86 ; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
  87 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
  88 ; VBITS_GE_1024-NEXT: ret
  89   %op = load <64 x half>, <64 x half>* %a
  90   %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op)
  91   store <64 x half> %res, <64 x half>* %a
  92   ret void
  93 }
  94
  95 define void @frintp_v128f16(<128 x half>* %a) #0 {
  96 ; CHECK-LABEL: frintp_v128f16:
  97 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
  98 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
  99 ; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 100 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 101 ; VBITS_GE_2048-NEXT: ret
 102   %op = load <128 x half>, <128 x half>* %a
 103   %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op)
 104   store <128 x half> %res, <128 x half>* %a
 105   ret void
 106 }
 107
 108 ; Don't use SVE for 64-bit vectors.
 109 define <2 x float> @frintp_v2f32(<2 x float> %op) #0 {
 110 ; CHECK-LABEL: frintp_v2f32:
 111 ; CHECK: frintp v0.2s, v0.2s
 112 ; CHECK-NEXT: ret
 113   %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op)
 114   ret <2 x float> %res
 115 }
 116
 117 ; Don't use SVE for 128-bit vectors.
 118 define <4 x float> @frintp_v4f32(<4 x float> %op) #0 {
 119 ; CHECK-LABEL: frintp_v4f32:
 120 ; CHECK: frintp v0.4s, v0.4s
 121 ; CHECK-NEXT: ret
 122   %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op)
 123   ret <4 x float> %res
 124 }
 125
 126 define void @frintp_v8f32(<8 x float>* %a) #0 {
 127 ; CHECK-LABEL: frintp_v8f32:
 128 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 129 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 130 ; CHECK-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 131 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 132 ; CHECK-NEXT: ret
 133   %op = load <8 x float>, <8 x float>* %a
 134   %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
 135   store <8 x float> %res, <8 x float>* %a
 136   ret void
 137 }
 138
 139 define void @frintp_v16f32(<16 x float>* %a) #0 {
 140 ; CHECK-LABEL: frintp_v16f32:
 141 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 142 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 143 ; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 144 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 145 ; VBITS_GE_512-NEXT: ret
 146
 147 ; Ensure sensible type legalisation.
 148 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 149 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 150 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 151 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 152 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 153 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 154 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
 155 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 156 ; VBITS_EQ_256-NEXT: ret
 157   %op = load <16 x float>, <16 x float>* %a
 158   %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op)
 159   store <16 x float> %res, <16 x float>* %a
 160   ret void
 161 }
 162
 163 define void @frintp_v32f32(<32 x float>* %a) #0 {
 164 ; CHECK-LABEL: frintp_v32f32:
 165 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 166 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 167 ; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 168 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 169 ; VBITS_GE_1024-NEXT: ret
 170   %op = load <32 x float>, <32 x float>* %a
 171   %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op)
 172   store <32 x float> %res, <32 x float>* %a
 173   ret void
 174 }
 175
 176 define void @frintp_v64f32(<64 x float>* %a) #0 {
 177 ; CHECK-LABEL: frintp_v64f32:
 178 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 179 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 180 ; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 181 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 182 ; VBITS_GE_2048-NEXT: ret
 183   %op = load <64 x float>, <64 x float>* %a
 184   %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op)
 185   store <64 x float> %res, <64 x float>* %a
 186   ret void
 187 }
 188
 189 ; Don't use SVE for 64-bit vectors.
 190 define <1 x double> @frintp_v1f64(<1 x double> %op) #0 {
 191 ; CHECK-LABEL: frintp_v1f64:
 192 ; CHECK: frintp d0, d0
 193 ; CHECK-NEXT: ret
 194   %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
 195   ret <1 x double> %res
 196 }
 197
 198 ; Don't use SVE for 128-bit vectors.
 199 define <2 x double> @frintp_v2f64(<2 x double> %op) #0 {
 200 ; CHECK-LABEL: frintp_v2f64:
 201 ; CHECK: frintp v0.2d, v0.2d
 202 ; CHECK-NEXT: ret
 203   %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op)
 204   ret <2 x double> %res
 205 }
 206
 207 define void @frintp_v4f64(<4 x double>* %a) #0 {
 208 ; CHECK-LABEL: frintp_v4f64:
 209 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 210 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 211 ; CHECK-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 212 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 213 ; CHECK-NEXT: ret
 214   %op = load <4 x double>, <4 x double>* %a
 215   %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
 216   store <4 x double> %res, <4 x double>* %a
 217   ret void
 218 }
 219
 220 define void @frintp_v8f64(<8 x double>* %a) #0 {
 221 ; CHECK-LABEL: frintp_v8f64:
 222 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 223 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 224 ; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 225 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 226 ; VBITS_GE_512-NEXT: ret
 227
 228 ; Ensure sensible type legalisation.
 229 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 230 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 231 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 232 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 233 ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 234 ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 235 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
 236 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 237 ; VBITS_EQ_256-NEXT: ret
 238   %op = load <8 x double>, <8 x double>* %a
 239   %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op)
 240   store <8 x double> %res, <8 x double>* %a
 241   ret void
 242 }
 243
 244 define void @frintp_v16f64(<16 x double>* %a) #0 {
 245 ; CHECK-LABEL: frintp_v16f64:
 246 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 247 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 248 ; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 249 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 250 ; VBITS_GE_1024-NEXT: ret
 251   %op = load <16 x double>, <16 x double>* %a
 252   %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op)
 253   store <16 x double> %res, <16 x double>* %a
 254   ret void
 255 }
 256
 257 define void @frintp_v32f64(<32 x double>* %a) #0 {
 258 ; CHECK-LABEL: frintp_v32f64:
 259 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
 260 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 261 ; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 262 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 263 ; VBITS_GE_2048-NEXT: ret
 264   %op = load <32 x double>, <32 x double>* %a
 265   %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op)
 266   store <32 x double> %res, <32 x double>* %a
 267   ret void
 268 }
 269
 270 ;
 271 ; FLOOR -> FRINTM
 272 ;
 273
 274 ; Don't use SVE for 64-bit vectors.
 275 define <4 x half> @frintm_v4f16(<4 x half> %op) #0 {
 276 ; CHECK-LABEL: frintm_v4f16:
 277 ; CHECK: frintm v0.4h, v0.4h
 278 ; CHECK-NEXT: ret
 279   %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op)
 280   ret <4 x half> %res
 281 }
 282
 283 ; Don't use SVE for 128-bit vectors.
 284 define <8 x half> @frintm_v8f16(<8 x half> %op) #0 {
 285 ; CHECK-LABEL: frintm_v8f16:
 286 ; CHECK: frintm v0.8h, v0.8h
 287 ; CHECK-NEXT: ret
 288   %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op)
 289   ret <8 x half> %res
 290 }
 291
 292 define void @frintm_v16f16(<16 x half>* %a) #0 {
 293 ; CHECK-LABEL: frintm_v16f16:
 294 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
 295 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 296 ; CHECK-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 297 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 298 ; CHECK-NEXT: ret
 299   %op = load <16 x half>, <16 x half>* %a
 300   %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
 301   store <16 x half> %res, <16 x half>* %a
 302   ret void
 303 }
 304
 305 define void @frintm_v32f16(<32 x half>* %a) #0 {
 306 ; CHECK-LABEL: frintm_v32f16:
 307 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
 308 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 309 ; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 310 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 311 ; VBITS_GE_512-NEXT: ret
 312
 313 ; Ensure sensible type legalisation.
 314 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 315 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 316 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
 317 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 318 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 319 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 320 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
 321 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 322 ; VBITS_EQ_256-NEXT: ret
 323   %op = load <32 x half>, <32 x half>* %a
 324   %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op)
 325   store <32 x half> %res, <32 x half>* %a
 326   ret void
 327 }
 328
 329 define void @frintm_v64f16(<64 x half>* %a) #0 {
 330 ; CHECK-LABEL: frintm_v64f16:
 331 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
 332 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 333 ; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 334 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 335 ; VBITS_GE_1024-NEXT: ret
 336   %op = load <64 x half>, <64 x half>* %a
 337   %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op)
 338   store <64 x half> %res, <64 x half>* %a
 339   ret void
 340 }
 341
 342 define void @frintm_v128f16(<128 x half>* %a) #0 {
 343 ; CHECK-LABEL: frintm_v128f16:
 344 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 345 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 346 ; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 347 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 348 ; VBITS_GE_2048-NEXT: ret
 349   %op = load <128 x half>, <128 x half>* %a
 350   %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op)
 351   store <128 x half> %res, <128 x half>* %a
 352   ret void
 353 }
 354
 355 ; Don't use SVE for 64-bit vectors.
 356 define <2 x float> @frintm_v2f32(<2 x float> %op) #0 {
 357 ; CHECK-LABEL: frintm_v2f32:
 358 ; CHECK: frintm v0.2s, v0.2s
 359 ; CHECK-NEXT: ret
 360   %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op)
 361   ret <2 x float> %res
 362 }
 363
 364 ; Don't use SVE for 128-bit vectors.
 365 define <4 x float> @frintm_v4f32(<4 x float> %op) #0 {
 366 ; CHECK-LABEL: frintm_v4f32:
 367 ; CHECK: frintm v0.4s, v0.4s
 368 ; CHECK-NEXT: ret
 369   %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op)
 370   ret <4 x float> %res
 371 }
 372
 373 define void @frintm_v8f32(<8 x float>* %a) #0 {
 374 ; CHECK-LABEL: frintm_v8f32:
 375 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 376 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 377 ; CHECK-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 378 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 379 ; CHECK-NEXT: ret
 380   %op = load <8 x float>, <8 x float>* %a
 381   %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
 382   store <8 x float> %res, <8 x float>* %a
 383   ret void
 384 }
 385
 386 define void @frintm_v16f32(<16 x float>* %a) #0 {
 387 ; CHECK-LABEL: frintm_v16f32:
 388 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 389 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 390 ; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 391 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 392 ; VBITS_GE_512-NEXT: ret
 393
 394 ; Ensure sensible type legalisation.
 395 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 396 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 397 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 398 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 399 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 400 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 401 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
 402 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 403 ; VBITS_EQ_256-NEXT: ret
 404   %op = load <16 x float>, <16 x float>* %a
 405   %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op)
 406   store <16 x float> %res, <16 x float>* %a
 407   ret void
 408 }
 409
 410 define void @frintm_v32f32(<32 x float>* %a) #0 {
 411 ; CHECK-LABEL: frintm_v32f32:
 412 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 413 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 414 ; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 415 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 416 ; VBITS_GE_1024-NEXT: ret
 417   %op = load <32 x float>, <32 x float>* %a
 418   %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op)
 419   store <32 x float> %res, <32 x float>* %a
 420   ret void
 421 }
 422
 423 define void @frintm_v64f32(<64 x float>* %a) #0 {
 424 ; CHECK-LABEL: frintm_v64f32:
 425 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 426 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 427 ; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 428 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 429 ; VBITS_GE_2048-NEXT: ret
 430   %op = load <64 x float>, <64 x float>* %a
 431   %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op)
 432   store <64 x float> %res, <64 x float>* %a
 433   ret void
 434 }
 435
 436 ; Don't use SVE for 64-bit vectors.
 437 define <1 x double> @frintm_v1f64(<1 x double> %op) #0 {
 438 ; CHECK-LABEL: frintm_v1f64:
 439 ; CHECK: frintm d0, d0
 440 ; CHECK-NEXT: ret
 441   %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
 442   ret <1 x double> %res
 443 }
 444
 445 ; Don't use SVE for 128-bit vectors.
 446 define <2 x double> @frintm_v2f64(<2 x double> %op) #0 {
 447 ; CHECK-LABEL: frintm_v2f64:
 448 ; CHECK: frintm v0.2d, v0.2d
 449 ; CHECK-NEXT: ret
 450   %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op)
 451   ret <2 x double> %res
 452 }
 453
 454 define void @frintm_v4f64(<4 x double>* %a) #0 {
 455 ; CHECK-LABEL: frintm_v4f64:
 456 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 457 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 458 ; CHECK-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 459 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 460 ; CHECK-NEXT: ret
 461   %op = load <4 x double>, <4 x double>* %a
 462   %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
 463   store <4 x double> %res, <4 x double>* %a
 464   ret void
 465 }
 466
 467 define void @frintm_v8f64(<8 x double>* %a) #0 {
 468 ; CHECK-LABEL: frintm_v8f64:
 469 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 470 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 471 ; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 472 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 473 ; VBITS_GE_512-NEXT: ret
 474
 475 ; Ensure sensible type legalisation.
 476 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 477 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 478 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 479 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 480 ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 481 ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 482 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
 483 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 484 ; VBITS_EQ_256-NEXT: ret
 485   %op = load <8 x double>, <8 x double>* %a
 486   %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op)
 487   store <8 x double> %res, <8 x double>* %a
 488   ret void
 489 }
 490
 491 define void @frintm_v16f64(<16 x double>* %a) #0 {
 492 ; CHECK-LABEL: frintm_v16f64:
 493 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 494 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 495 ; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 496 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 497 ; VBITS_GE_1024-NEXT: ret
 498   %op = load <16 x double>, <16 x double>* %a
 499   %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op)
 500   store <16 x double> %res, <16 x double>* %a
 501   ret void
 502 }
 503
 504 define void @frintm_v32f64(<32 x double>* %a) #0 {
 505 ; CHECK-LABEL: frintm_v32f64:
 506 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
 507 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 508 ; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 509 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 510 ; VBITS_GE_2048-NEXT: ret
 511   %op = load <32 x double>, <32 x double>* %a
 512   %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op)
 513   store <32 x double> %res, <32 x double>* %a
 514   ret void
 515 }
 516
 517 ;
 518 ; FNEARBYINT -> FRINTI
 519 ;
 520
 521 ; Don't use SVE for 64-bit vectors.
 522 define <4 x half> @frinti_v4f16(<4 x half> %op) #0 {
 523 ; CHECK-LABEL: frinti_v4f16:
 524 ; CHECK: frinti v0.4h, v0.4h
 525 ; CHECK-NEXT: ret
 526   %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op)
 527   ret <4 x half> %res
 528 }
 529
 530 ; Don't use SVE for 128-bit vectors.
 531 define <8 x half> @frinti_v8f16(<8 x half> %op) #0 {
 532 ; CHECK-LABEL: frinti_v8f16:
 533 ; CHECK: frinti v0.8h, v0.8h
 534 ; CHECK-NEXT: ret
 535   %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op)
 536   ret <8 x half> %res
 537 }
 538
 539 define void @frinti_v16f16(<16 x half>* %a) #0 {
 540 ; CHECK-LABEL: frinti_v16f16:
 541 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
 542 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 543 ; CHECK-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 544 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 545 ; CHECK-NEXT: ret
 546   %op = load <16 x half>, <16 x half>* %a
 547   %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
 548   store <16 x half> %res, <16 x half>* %a
 549   ret void
 550 }
 551
 552 define void @frinti_v32f16(<32 x half>* %a) #0 {
 553 ; CHECK-LABEL: frinti_v32f16:
 554 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
 555 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 556 ; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 557 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 558 ; VBITS_GE_512-NEXT: ret
 559
 560 ; Ensure sensible type legalisation.
 561 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 562 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 563 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
 564 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 565 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 566 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 567 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
 568 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 569 ; VBITS_EQ_256-NEXT: ret
 570   %op = load <32 x half>, <32 x half>* %a
 571   %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op)
 572   store <32 x half> %res, <32 x half>* %a
 573   ret void
 574 }
 575
 576 define void @frinti_v64f16(<64 x half>* %a) #0 {
 577 ; CHECK-LABEL: frinti_v64f16:
 578 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
 579 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 580 ; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 581 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 582 ; VBITS_GE_1024-NEXT: ret
 583   %op = load <64 x half>, <64 x half>* %a
 584   %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op)
 585   store <64 x half> %res, <64 x half>* %a
 586   ret void
 587 }
 588
 589 define void @frinti_v128f16(<128 x half>* %a) #0 {
 590 ; CHECK-LABEL: frinti_v128f16:
 591 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 592 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 593 ; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 594 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 595 ; VBITS_GE_2048-NEXT: ret
 596   %op = load <128 x half>, <128 x half>* %a
 597   %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op)
 598   store <128 x half> %res, <128 x half>* %a
 599   ret void
 600 }
 601
 602 ; Don't use SVE for 64-bit vectors.
 603 define <2 x float> @frinti_v2f32(<2 x float> %op) #0 {
 604 ; CHECK-LABEL: frinti_v2f32:
 605 ; CHECK: frinti v0.2s, v0.2s
 606 ; CHECK-NEXT: ret
 607   %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op)
 608   ret <2 x float> %res
 609 }
 610
 611 ; Don't use SVE for 128-bit vectors.
 612 define <4 x float> @frinti_v4f32(<4 x float> %op) #0 {
 613 ; CHECK-LABEL: frinti_v4f32:
 614 ; CHECK: frinti v0.4s, v0.4s
 615 ; CHECK-NEXT: ret
 616   %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op)
 617   ret <4 x float> %res
 618 }
 619
 620 define void @frinti_v8f32(<8 x float>* %a) #0 {
 621 ; CHECK-LABEL: frinti_v8f32:
 622 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 623 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 624 ; CHECK-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 625 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 626 ; CHECK-NEXT: ret
 627   %op = load <8 x float>, <8 x float>* %a
 628   %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
 629   store <8 x float> %res, <8 x float>* %a
 630   ret void
 631 }
 632
 633 define void @frinti_v16f32(<16 x float>* %a) #0 {
 634 ; CHECK-LABEL: frinti_v16f32:
 635 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 636 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 637 ; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 638 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 639 ; VBITS_GE_512-NEXT: ret
 640
 641 ; Ensure sensible type legalisation.
 642 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 643 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 644 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 645 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 646 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 647 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 648 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
 649 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 650 ; VBITS_EQ_256-NEXT: ret
 651   %op = load <16 x float>, <16 x float>* %a
 652   %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op)
 653   store <16 x float> %res, <16 x float>* %a
 654   ret void
 655 }
 656
 657 define void @frinti_v32f32(<32 x float>* %a) #0 {
 658 ; CHECK-LABEL: frinti_v32f32:
 659 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 660 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 661 ; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 662 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 663 ; VBITS_GE_1024-NEXT: ret
 664   %op = load <32 x float>, <32 x float>* %a
 665   %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op)
 666   store <32 x float> %res, <32 x float>* %a
 667   ret void
 668 }
 669
 670 define void @frinti_v64f32(<64 x float>* %a) #0 {
 671 ; CHECK-LABEL: frinti_v64f32:
 672 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 673 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 674 ; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 675 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 676 ; VBITS_GE_2048-NEXT: ret
 677   %op = load <64 x float>, <64 x float>* %a
 678   %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op)
 679   store <64 x float> %res, <64 x float>* %a
 680   ret void
 681 }
 682
 683 ; Don't use SVE for 64-bit vectors.
 684 define <1 x double> @frinti_v1f64(<1 x double> %op) #0 {
 685 ; CHECK-LABEL: frinti_v1f64:
 686 ; CHECK: frinti d0, d0
 687 ; CHECK-NEXT: ret
 688   %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
 689   ret <1 x double> %res
 690 }
 691
 692 ; Don't use SVE for 128-bit vectors.
 693 define <2 x double> @frinti_v2f64(<2 x double> %op) #0 {
 694 ; CHECK-LABEL: frinti_v2f64:
 695 ; CHECK: frinti v0.2d, v0.2d
 696 ; CHECK-NEXT: ret
 697   %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op)
 698   ret <2 x double> %res
 699 }
 700
 701 define void @frinti_v4f64(<4 x double>* %a) #0 {
 702 ; CHECK-LABEL: frinti_v4f64:
 703 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 704 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 705 ; CHECK-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 706 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 707 ; CHECK-NEXT: ret
 708   %op = load <4 x double>, <4 x double>* %a
 709   %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
 710   store <4 x double> %res, <4 x double>* %a
 711   ret void
 712 }
 713
 714 define void @frinti_v8f64(<8 x double>* %a) #0 {
 715 ; CHECK-LABEL: frinti_v8f64:
 716 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 717 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 718 ; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 719 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 720 ; VBITS_GE_512-NEXT: ret
 721
 722 ; Ensure sensible type legalisation.
 723 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 724 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 725 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 726 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 727 ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 728 ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 729 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
 730 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 731 ; VBITS_EQ_256-NEXT: ret
 732   %op = load <8 x double>, <8 x double>* %a
 733   %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op)
 734   store <8 x double> %res, <8 x double>* %a
 735   ret void
 736 }
 737
 738 define void @frinti_v16f64(<16 x double>* %a) #0 {
 739 ; CHECK-LABEL: frinti_v16f64:
 740 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 741 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 742 ; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 743 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 744 ; VBITS_GE_1024-NEXT: ret
 745   %op = load <16 x double>, <16 x double>* %a
 746   %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op)
 747   store <16 x double> %res, <16 x double>* %a
 748   ret void
 749 }
 750
 751 define void @frinti_v32f64(<32 x double>* %a) #0 {
 752 ; CHECK-LABEL: frinti_v32f64:
 753 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
 754 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 755 ; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 756 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 757 ; VBITS_GE_2048-NEXT: ret
 758   %op = load <32 x double>, <32 x double>* %a
 759   %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op)
 760   store <32 x double> %res, <32 x double>* %a
 761   ret void
 762 }
 763
 764 ;
 765 ; RINT -> FRINTX
 766 ;
 767
 768 ; Don't use SVE for 64-bit vectors.
 769 define <4 x half> @frintx_v4f16(<4 x half> %op) #0 {
 770 ; CHECK-LABEL: frintx_v4f16:
 771 ; CHECK: frintx v0.4h, v0.4h
 772 ; CHECK-NEXT: ret
 773   %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op)
 774   ret <4 x half> %res
 775 }
 776
 777 ; Don't use SVE for 128-bit vectors.
 778 define <8 x half> @frintx_v8f16(<8 x half> %op) #0 {
 779 ; CHECK-LABEL: frintx_v8f16:
 780 ; CHECK: frintx v0.8h, v0.8h
 781 ; CHECK-NEXT: ret
 782   %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op)
 783   ret <8 x half> %res
 784 }
 785
 786 define void @frintx_v16f16(<16 x half>* %a) #0 {
 787 ; CHECK-LABEL: frintx_v16f16:
 788 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
 789 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 790 ; CHECK-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 791 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 792 ; CHECK-NEXT: ret
 793   %op = load <16 x half>, <16 x half>* %a
 794   %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
 795   store <16 x half> %res, <16 x half>* %a
 796   ret void
 797 }
 798
 799 define void @frintx_v32f16(<32 x half>* %a) #0 {
 800 ; CHECK-LABEL: frintx_v32f16:
 801 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
 802 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 803 ; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 804 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 805 ; VBITS_GE_512-NEXT: ret
 806
 807 ; Ensure sensible type legalisation.
 808 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 809 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 810 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
 811 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 812 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
 813 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
 814 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
 815 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 816 ; VBITS_EQ_256-NEXT: ret
 817   %op = load <32 x half>, <32 x half>* %a
 818   %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op)
 819   store <32 x half> %res, <32 x half>* %a
 820   ret void
 821 }
 822
 823 define void @frintx_v64f16(<64 x half>* %a) #0 {
 824 ; CHECK-LABEL: frintx_v64f16:
 825 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
 826 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 827 ; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 828 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 829 ; VBITS_GE_1024-NEXT: ret
 830   %op = load <64 x half>, <64 x half>* %a
 831   %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op)
 832   store <64 x half> %res, <64 x half>* %a
 833   ret void
 834 }
 835
 836 define void @frintx_v128f16(<128 x half>* %a) #0 {
 837 ; CHECK-LABEL: frintx_v128f16:
 838 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 839 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 840 ; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
 841 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 842 ; VBITS_GE_2048-NEXT: ret
 843   %op = load <128 x half>, <128 x half>* %a
 844   %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op)
 845   store <128 x half> %res, <128 x half>* %a
 846   ret void
 847 }
 848
 849 ; Don't use SVE for 64-bit vectors.
 850 define <2 x float> @frintx_v2f32(<2 x float> %op) #0 {
 851 ; CHECK-LABEL: frintx_v2f32:
 852 ; CHECK: frintx v0.2s, v0.2s
 853 ; CHECK-NEXT: ret
 854   %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op)
 855   ret <2 x float> %res
 856 }
 857
 858 ; Don't use SVE for 128-bit vectors.
 859 define <4 x float> @frintx_v4f32(<4 x float> %op) #0 {
 860 ; CHECK-LABEL: frintx_v4f32:
 861 ; CHECK: frintx v0.4s, v0.4s
 862 ; CHECK-NEXT: ret
 863   %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op)
 864   ret <4 x float> %res
 865 }
 866
 867 define void @frintx_v8f32(<8 x float>* %a) #0 {
 868 ; CHECK-LABEL: frintx_v8f32:
 869 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 870 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 871 ; CHECK-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 872 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 873 ; CHECK-NEXT: ret
 874   %op = load <8 x float>, <8 x float>* %a
 875   %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
 876   store <8 x float> %res, <8 x float>* %a
 877   ret void
 878 }
 879
 880 define void @frintx_v16f32(<16 x float>* %a) #0 {
 881 ; CHECK-LABEL: frintx_v16f32:
 882 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 883 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 884 ; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 885 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 886 ; VBITS_GE_512-NEXT: ret
 887
 888 ; Ensure sensible type legalisation.
 889 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 890 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 891 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 892 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 893 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
 894 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
 895 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
 896 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 897 ; VBITS_EQ_256-NEXT: ret
 898   %op = load <16 x float>, <16 x float>* %a
 899   %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op)
 900   store <16 x float> %res, <16 x float>* %a
 901   ret void
 902 }
 903
 904 define void @frintx_v32f32(<32 x float>* %a) #0 {
 905 ; CHECK-LABEL: frintx_v32f32:
 906 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 907 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 908 ; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 909 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 910 ; VBITS_GE_1024-NEXT: ret
 911   %op = load <32 x float>, <32 x float>* %a
 912   %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op)
 913   store <32 x float> %res, <32 x float>* %a
 914   ret void
 915 }
 916
 917 define void @frintx_v64f32(<64 x float>* %a) #0 {
 918 ; CHECK-LABEL: frintx_v64f32:
 919 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 920 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 921 ; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
 922 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 923 ; VBITS_GE_2048-NEXT: ret
 924   %op = load <64 x float>, <64 x float>* %a
 925   %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op)
 926   store <64 x float> %res, <64 x float>* %a
 927   ret void
 928 }
 929
 930 ; Don't use SVE for 64-bit vectors.
 931 define <1 x double> @frintx_v1f64(<1 x double> %op) #0 {
 932 ; CHECK-LABEL: frintx_v1f64:
 933 ; CHECK: frintx d0, d0
 934 ; CHECK-NEXT: ret
 935   %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
 936   ret <1 x double> %res
 937 }
 938
 939 ; Don't use SVE for 128-bit vectors.
 940 define <2 x double> @frintx_v2f64(<2 x double> %op) #0 {
 941 ; CHECK-LABEL: frintx_v2f64:
 942 ; CHECK: frintx v0.2d, v0.2d
 943 ; CHECK-NEXT: ret
 944   %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op)
 945   ret <2 x double> %res
 946 }
 947
 948 define void @frintx_v4f64(<4 x double>* %a) #0 {
 949 ; CHECK-LABEL: frintx_v4f64:
 950 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 951 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 952 ; CHECK-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 953 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 954 ; CHECK-NEXT: ret
 955   %op = load <4 x double>, <4 x double>* %a
 956   %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
 957   store <4 x double> %res, <4 x double>* %a
 958   ret void
 959 }
 960
 961 define void @frintx_v8f64(<8 x double>* %a) #0 {
 962 ; CHECK-LABEL: frintx_v8f64:
 963 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 964 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 965 ; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 966 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 967 ; VBITS_GE_512-NEXT: ret
 968
 969 ; Ensure sensible type legalisation.
 970 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 971 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 972 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 973 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 974 ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
 975 ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
 976 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
 977 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 978 ; VBITS_EQ_256-NEXT: ret
 979   %op = load <8 x double>, <8 x double>* %a
 980   %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op)
 981   store <8 x double> %res, <8 x double>* %a
 982   ret void
 983 }
 984
 985 define void @frintx_v16f64(<16 x double>* %a) #0 {
 986 ; CHECK-LABEL: frintx_v16f64:
 987 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 988 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 989 ; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
 990 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 991 ; VBITS_GE_1024-NEXT: ret
 992   %op = load <16 x double>, <16 x double>* %a
 993   %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op)
 994   store <16 x double> %res, <16 x double>* %a
 995   ret void
 996 }
 997
 998 define void @frintx_v32f64(<32 x double>* %a) #0 {
 999 ; CHECK-LABEL: frintx_v32f64:
1000 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1001 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1002 ; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1003 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1004 ; VBITS_GE_2048-NEXT: ret
1005   %op = load <32 x double>, <32 x double>* %a
1006   %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op)
1007   store <32 x double> %res, <32 x double>* %a
1008   ret void
1009 }
1010
1011 ;
1012 ; ROUND -> FRINTA
1013 ;
1014
1015 ; Don't use SVE for 64-bit vectors.
1016 define <4 x half> @frinta_v4f16(<4 x half> %op) #0 {
1017 ; CHECK-LABEL: frinta_v4f16:
1018 ; CHECK: frinta v0.4h, v0.4h
1019 ; CHECK-NEXT: ret
1020   %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op)
1021   ret <4 x half> %res
1022 }
1023
1024 ; Don't use SVE for 128-bit vectors.
1025 define <8 x half> @frinta_v8f16(<8 x half> %op) #0 {
1026 ; CHECK-LABEL: frinta_v8f16:
1027 ; CHECK: frinta v0.8h, v0.8h
1028 ; CHECK-NEXT: ret
1029   %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op)
1030   ret <8 x half> %res
1031 }
1032
1033 define void @frinta_v16f16(<16 x half>* %a) #0 {
1034 ; CHECK-LABEL: frinta_v16f16:
1035 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1036 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1037 ; CHECK-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1038 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1039 ; CHECK-NEXT: ret
1040   %op = load <16 x half>, <16 x half>* %a
1041   %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
1042   store <16 x half> %res, <16 x half>* %a
1043   ret void
1044 }
1045
1046 define void @frinta_v32f16(<32 x half>* %a) #0 {
1047 ; CHECK-LABEL: frinta_v32f16:
1048 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1049 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1050 ; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1051 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1052 ; VBITS_GE_512-NEXT: ret
1053
1054 ; Ensure sensible type legalisation.
1055 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1056 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1057 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1058 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1059 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1060 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1061 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1062 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1063 ; VBITS_EQ_256-NEXT: ret
1064   %op = load <32 x half>, <32 x half>* %a
1065   %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op)
1066   store <32 x half> %res, <32 x half>* %a
1067   ret void
1068 }
1069
1070 define void @frinta_v64f16(<64 x half>* %a) #0 {
1071 ; CHECK-LABEL: frinta_v64f16:
1072 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1073 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1074 ; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1075 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1076 ; VBITS_GE_1024-NEXT: ret
1077   %op = load <64 x half>, <64 x half>* %a
1078   %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op)
1079   store <64 x half> %res, <64 x half>* %a
1080   ret void
1081 }
1082
1083 define void @frinta_v128f16(<128 x half>* %a) #0 {
1084 ; CHECK-LABEL: frinta_v128f16:
1085 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1086 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1087 ; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1088 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1089 ; VBITS_GE_2048-NEXT: ret
1090   %op = load <128 x half>, <128 x half>* %a
1091   %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op)
1092   store <128 x half> %res, <128 x half>* %a
1093   ret void
1094 }
1095
1096 ; Don't use SVE for 64-bit vectors.
1097 define <2 x float> @frinta_v2f32(<2 x float> %op) #0 {
1098 ; CHECK-LABEL: frinta_v2f32:
1099 ; CHECK: frinta v0.2s, v0.2s
1100 ; CHECK-NEXT: ret
1101   %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op)
1102   ret <2 x float> %res
1103 }
1104
1105 ; Don't use SVE for 128-bit vectors.
1106 define <4 x float> @frinta_v4f32(<4 x float> %op) #0 {
1107 ; CHECK-LABEL: frinta_v4f32:
1108 ; CHECK: frinta v0.4s, v0.4s
1109 ; CHECK-NEXT: ret
1110   %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op)
1111   ret <4 x float> %res
1112 }
1113
1114 define void @frinta_v8f32(<8 x float>* %a) #0 {
1115 ; CHECK-LABEL: frinta_v8f32:
1116 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1117 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1118 ; CHECK-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1119 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1120 ; CHECK-NEXT: ret
1121   %op = load <8 x float>, <8 x float>* %a
1122   %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
1123   store <8 x float> %res, <8 x float>* %a
1124   ret void
1125 }
1126
1127 define void @frinta_v16f32(<16 x float>* %a) #0 {
1128 ; CHECK-LABEL: frinta_v16f32:
1129 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1130 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1131 ; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1132 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1133 ; VBITS_GE_512-NEXT: ret
1134
1135 ; Ensure sensible type legalisation.
1136 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1137 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1138 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1139 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1140 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1141 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1142 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1143 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1144 ; VBITS_EQ_256-NEXT: ret
1145   %op = load <16 x float>, <16 x float>* %a
1146   %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op)
1147   store <16 x float> %res, <16 x float>* %a
1148   ret void
1149 }
1150
1151 define void @frinta_v32f32(<32 x float>* %a) #0 {
1152 ; CHECK-LABEL: frinta_v32f32:
1153 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1154 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1155 ; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1156 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1157 ; VBITS_GE_1024-NEXT: ret
1158   %op = load <32 x float>, <32 x float>* %a
1159   %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op)
1160   store <32 x float> %res, <32 x float>* %a
1161   ret void
1162 }
1163
1164 define void @frinta_v64f32(<64 x float>* %a) #0 {
1165 ; CHECK-LABEL: frinta_v64f32:
1166 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1167 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1168 ; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1169 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1170 ; VBITS_GE_2048-NEXT: ret
1171   %op = load <64 x float>, <64 x float>* %a
1172   %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op)
1173   store <64 x float> %res, <64 x float>* %a
1174   ret void
1175 }
1176
1177 ; Don't use SVE for 64-bit vectors.
1178 define <1 x double> @frinta_v1f64(<1 x double> %op) #0 {
1179 ; CHECK-LABEL: frinta_v1f64:
1180 ; CHECK: frinta d0, d0
1181 ; CHECK-NEXT: ret
1182   %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
1183   ret <1 x double> %res
1184 }
1185
1186 ; Don't use SVE for 128-bit vectors.
1187 define <2 x double> @frinta_v2f64(<2 x double> %op) #0 {
1188 ; CHECK-LABEL: frinta_v2f64:
1189 ; CHECK: frinta v0.2d, v0.2d
1190 ; CHECK-NEXT: ret
1191   %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op)
1192   ret <2 x double> %res
1193 }
1194
1195 define void @frinta_v4f64(<4 x double>* %a) #0 {
1196 ; CHECK-LABEL: frinta_v4f64:
1197 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1198 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1199 ; CHECK-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1200 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1201 ; CHECK-NEXT: ret
1202   %op = load <4 x double>, <4 x double>* %a
1203   %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
1204   store <4 x double> %res, <4 x double>* %a
1205   ret void
1206 }
1207
1208 define void @frinta_v8f64(<8 x double>* %a) #0 {
1209 ; CHECK-LABEL: frinta_v8f64:
1210 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1211 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1212 ; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1213 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1214 ; VBITS_GE_512-NEXT: ret
1215
1216 ; Ensure sensible type legalisation.
1217 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1218 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1219 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1220 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1221 ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1222 ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1223 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1224 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1225 ; VBITS_EQ_256-NEXT: ret
1226   %op = load <8 x double>, <8 x double>* %a
1227   %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op)
1228   store <8 x double> %res, <8 x double>* %a
1229   ret void
1230 }
1231
1232 define void @frinta_v16f64(<16 x double>* %a) #0 {
1233 ; CHECK-LABEL: frinta_v16f64:
1234 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1235 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1236 ; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1237 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1238 ; VBITS_GE_1024-NEXT: ret
1239   %op = load <16 x double>, <16 x double>* %a
1240   %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op)
1241   store <16 x double> %res, <16 x double>* %a
1242   ret void
1243 }
1244
1245 define void @frinta_v32f64(<32 x double>* %a) #0 {
1246 ; CHECK-LABEL: frinta_v32f64:
1247 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1248 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1249 ; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1250 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1251 ; VBITS_GE_2048-NEXT: ret
1252   %op = load <32 x double>, <32 x double>* %a
1253   %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op)
1254   store <32 x double> %res, <32 x double>* %a
1255   ret void
1256 }
1257
1258 ;
1259 ; ROUNDEVEN -> FRINTN
1260 ;
1261
1262 ; Don't use SVE for 64-bit vectors.
1263 define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
1264 ; CHECK-LABEL: frintn_v4f16:
1265 ; CHECK: frintn v0.4h, v0.4h
1266 ; CHECK-NEXT: ret
1267   %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
1268   ret <4 x half> %res
1269 }
1270
1271 ; Don't use SVE for 128-bit vectors.
1272 define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
1273 ; CHECK-LABEL: frintn_v8f16:
1274 ; CHECK: frintn v0.8h, v0.8h
1275 ; CHECK-NEXT: ret
1276   %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
1277   ret <8 x half> %res
1278 }
1279
1280 define void @frintn_v16f16(<16 x half>* %a) #0 {
1281 ; CHECK-LABEL: frintn_v16f16:
1282 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1283 ; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1284 ; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1285 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1286 ; CHECK-NEXT: ret
1287   %op = load <16 x half>, <16 x half>* %a
1288   %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
1289   store <16 x half> %res, <16 x half>* %a
1290   ret void
1291 }
1292
1293 define void @frintn_v32f16(<32 x half>* %a) #0 {
1294 ; CHECK-LABEL: frintn_v32f16:
1295 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1296 ; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1297 ; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1298 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1299 ; VBITS_GE_512-NEXT: ret
1300
1301 ; Ensure sensible type legalisation.
1302 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1303 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1304 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1305 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1306 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1307 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1308 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1309 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1310 ; VBITS_EQ_256-NEXT: ret
1311   %op = load <32 x half>, <32 x half>* %a
1312   %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
1313   store <32 x half> %res, <32 x half>* %a
1314   ret void
1315 }
1316
1317 define void @frintn_v64f16(<64 x half>* %a) #0 {
1318 ; CHECK-LABEL: frintn_v64f16:
1319 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1320 ; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1321 ; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1322 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1323 ; VBITS_GE_1024-NEXT: ret
1324   %op = load <64 x half>, <64 x half>* %a
1325   %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op)
1326   store <64 x half> %res, <64 x half>* %a
1327   ret void
1328 }
1329
1330 define void @frintn_v128f16(<128 x half>* %a) #0 {
1331 ; CHECK-LABEL: frintn_v128f16:
1332 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1333 ; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1334 ; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1335 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1336 ; VBITS_GE_2048-NEXT: ret
1337   %op = load <128 x half>, <128 x half>* %a
1338   %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op)
1339   store <128 x half> %res, <128 x half>* %a
1340   ret void
1341 }
1342
1343 ; Don't use SVE for 64-bit vectors.
1344 define <2 x float> @frintn_v2f32(<2 x float> %op) #0 {
1345 ; CHECK-LABEL: frintn_v2f32:
1346 ; CHECK: frintn v0.2s, v0.2s
1347 ; CHECK-NEXT: ret
1348   %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
1349   ret <2 x float> %res
1350 }
1351
1352 ; Don't use SVE for 128-bit vectors.
1353 define <4 x float> @frintn_v4f32(<4 x float> %op) #0 {
1354 ; CHECK-LABEL: frintn_v4f32:
1355 ; CHECK: frintn v0.4s, v0.4s
1356 ; CHECK-NEXT: ret
1357   %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
1358   ret <4 x float> %res
1359 }
1360
1361 define void @frintn_v8f32(<8 x float>* %a) #0 {
1362 ; CHECK-LABEL: frintn_v8f32:
1363 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1364 ; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1365 ; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1366 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1367 ; CHECK-NEXT: ret
1368   %op = load <8 x float>, <8 x float>* %a
1369   %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
1370   store <8 x float> %res, <8 x float>* %a
1371   ret void
1372 }
1373
1374 define void @frintn_v16f32(<16 x float>* %a) #0 {
1375 ; CHECK-LABEL: frintn_v16f32:
1376 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1377 ; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1378 ; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1379 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1380 ; VBITS_GE_512-NEXT: ret
1381
1382 ; Ensure sensible type legalisation.
1383 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1384 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1385 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1386 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1387 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1388 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1389 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1390 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1391 ; VBITS_EQ_256-NEXT: ret
1392   %op = load <16 x float>, <16 x float>* %a
1393   %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op)
1394   store <16 x float> %res, <16 x float>* %a
1395   ret void
1396 }
1397
1398 define void @frintn_v32f32(<32 x float>* %a) #0 {
1399 ; CHECK-LABEL: frintn_v32f32:
1400 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1401 ; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1402 ; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1403 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1404 ; VBITS_GE_1024-NEXT: ret
1405   %op = load <32 x float>, <32 x float>* %a
1406   %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op)
1407   store <32 x float> %res, <32 x float>* %a
1408   ret void
1409 }
1410
1411 define void @frintn_v64f32(<64 x float>* %a) #0 {
1412 ; CHECK-LABEL: frintn_v64f32:
1413 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1414 ; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1415 ; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1416 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1417 ; VBITS_GE_2048-NEXT: ret
1418   %op = load <64 x float>, <64 x float>* %a
1419   %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op)
1420   store <64 x float> %res, <64 x float>* %a
1421   ret void
1422 }
1423
1424 ; Don't use SVE for 64-bit vectors.
1425 define <1 x double> @frintn_v1f64(<1 x double> %op) #0 {
1426 ; CHECK-LABEL: frintn_v1f64:
1427 ; CHECK: frintn d0, d0
1428 ; CHECK-NEXT: ret
1429   %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
1430   ret <1 x double> %res
1431 }
1432
1433 ; Don't use SVE for 128-bit vectors.
1434 define <2 x double> @frintn_v2f64(<2 x double> %op) #0 {
1435 ; CHECK-LABEL: frintn_v2f64:
1436 ; CHECK: frintn v0.2d, v0.2d
1437 ; CHECK-NEXT: ret
1438   %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
1439   ret <2 x double> %res
1440 }
1441
1442 define void @frintn_v4f64(<4 x double>* %a) #0 {
1443 ; CHECK-LABEL: frintn_v4f64:
1444 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1445 ; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1446 ; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1447 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1448 ; CHECK-NEXT: ret
1449   %op = load <4 x double>, <4 x double>* %a
1450   %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
1451   store <4 x double> %res, <4 x double>* %a
1452   ret void
1453 }
1454
1455 define void @frintn_v8f64(<8 x double>* %a) #0 {
1456 ; CHECK-LABEL: frintn_v8f64:
1457 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1458 ; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1459 ; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1460 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1461 ; VBITS_GE_512-NEXT: ret
1462
1463 ; Ensure sensible type legalisation.
1464 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1465 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1466 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1467 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1468 ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1469 ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1470 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1471 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1472 ; VBITS_EQ_256-NEXT: ret
1473   %op = load <8 x double>, <8 x double>* %a
1474   %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op)
1475   store <8 x double> %res, <8 x double>* %a
1476   ret void
1477 }
1478
1479 define void @frintn_v16f64(<16 x double>* %a) #0 {
1480 ; CHECK-LABEL: frintn_v16f64:
1481 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1482 ; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1483 ; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1484 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1485 ; VBITS_GE_1024-NEXT: ret
1486   %op = load <16 x double>, <16 x double>* %a
1487   %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op)
1488   store <16 x double> %res, <16 x double>* %a
1489   ret void
1490 }
1491
1492 define void @frintn_v32f64(<32 x double>* %a) #0 {
1493 ; CHECK-LABEL: frintn_v32f64:
1494 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1495 ; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1496 ; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1497 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1498 ; VBITS_GE_2048-NEXT: ret
1499   %op = load <32 x double>, <32 x double>* %a
1500   %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op)
1501   store <32 x double> %res, <32 x double>* %a
1502   ret void
1503 }
1504
1505 ;
1506 ; TRUNC -> FRINTZ
1507 ;
1508
1509 ; Don't use SVE for 64-bit vectors.
1510 define <4 x half> @frintz_v4f16(<4 x half> %op) #0 {
1511 ; CHECK-LABEL: frintz_v4f16:
1512 ; CHECK: frintz v0.4h, v0.4h
1513 ; CHECK-NEXT: ret
1514   %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op)
1515   ret <4 x half> %res
1516 }
1517
1518 ; Don't use SVE for 128-bit vectors.
1519 define <8 x half> @frintz_v8f16(<8 x half> %op) #0 {
1520 ; CHECK-LABEL: frintz_v8f16:
1521 ; CHECK: frintz v0.8h, v0.8h
1522 ; CHECK-NEXT: ret
1523   %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op)
1524   ret <8 x half> %res
1525 }
1526
1527 define void @frintz_v16f16(<16 x half>* %a) #0 {
1528 ; CHECK-LABEL: frintz_v16f16:
1529 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
1530 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1531 ; CHECK-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1532 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1533 ; CHECK-NEXT: ret
1534   %op = load <16 x half>, <16 x half>* %a
1535   %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
1536   store <16 x half> %res, <16 x half>* %a
1537   ret void
1538 }
1539
1540 define void @frintz_v32f16(<32 x half>* %a) #0 {
1541 ; CHECK-LABEL: frintz_v32f16:
1542 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
1543 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1544 ; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1545 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1546 ; VBITS_GE_512-NEXT: ret
1547
1548 ; Ensure sensible type legalisation.
1549 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
1550 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
1551 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
1552 ; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
1553 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
1554 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
1555 ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
1556 ; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
1557 ; VBITS_EQ_256-NEXT: ret
1558   %op = load <32 x half>, <32 x half>* %a
1559   %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op)
1560   store <32 x half> %res, <32 x half>* %a
1561   ret void
1562 }
1563
1564 define void @frintz_v64f16(<64 x half>* %a) #0 {
1565 ; CHECK-LABEL: frintz_v64f16:
1566 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
1567 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1568 ; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1569 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1570 ; VBITS_GE_1024-NEXT: ret
1571   %op = load <64 x half>, <64 x half>* %a
1572   %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op)
1573   store <64 x half> %res, <64 x half>* %a
1574   ret void
1575 }
1576
1577 define void @frintz_v128f16(<128 x half>* %a) #0 {
1578 ; CHECK-LABEL: frintz_v128f16:
1579 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
1580 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
1581 ; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
1582 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
1583 ; VBITS_GE_2048-NEXT: ret
1584   %op = load <128 x half>, <128 x half>* %a
1585   %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op)
1586   store <128 x half> %res, <128 x half>* %a
1587   ret void
1588 }
1589
1590 ; Don't use SVE for 64-bit vectors.
1591 define <2 x float> @frintz_v2f32(<2 x float> %op) #0 {
1592 ; CHECK-LABEL: frintz_v2f32:
1593 ; CHECK: frintz v0.2s, v0.2s
1594 ; CHECK-NEXT: ret
1595   %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op)
1596   ret <2 x float> %res
1597 }
1598
1599 ; Don't use SVE for 128-bit vectors.
1600 define <4 x float> @frintz_v4f32(<4 x float> %op) #0 {
1601 ; CHECK-LABEL: frintz_v4f32:
1602 ; CHECK: frintz v0.4s, v0.4s
1603 ; CHECK-NEXT: ret
1604   %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op)
1605   ret <4 x float> %res
1606 }
1607
1608 define void @frintz_v8f32(<8 x float>* %a) #0 {
1609 ; CHECK-LABEL: frintz_v8f32:
1610 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
1611 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1612 ; CHECK-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1613 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1614 ; CHECK-NEXT: ret
1615   %op = load <8 x float>, <8 x float>* %a
1616   %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
1617   store <8 x float> %res, <8 x float>* %a
1618   ret void
1619 }
1620
1621 define void @frintz_v16f32(<16 x float>* %a) #0 {
1622 ; CHECK-LABEL: frintz_v16f32:
1623 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
1624 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1625 ; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1626 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1627 ; VBITS_GE_512-NEXT: ret
1628
1629 ; Ensure sensible type legalisation.
1630 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
1631 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
1632 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0]
1633 ; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
1634 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s
1635 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s
1636 ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0]
1637 ; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
1638 ; VBITS_EQ_256-NEXT: ret
1639   %op = load <16 x float>, <16 x float>* %a
1640   %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op)
1641   store <16 x float> %res, <16 x float>* %a
1642   ret void
1643 }
1644
1645 define void @frintz_v32f32(<32 x float>* %a) #0 {
1646 ; CHECK-LABEL: frintz_v32f32:
1647 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
1648 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1649 ; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1650 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1651 ; VBITS_GE_1024-NEXT: ret
1652   %op = load <32 x float>, <32 x float>* %a
1653   %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op)
1654   store <32 x float> %res, <32 x float>* %a
1655   ret void
1656 }
1657
1658 define void @frintz_v64f32(<64 x float>* %a) #0 {
1659 ; CHECK-LABEL: frintz_v64f32:
1660 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
1661 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
1662 ; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s
1663 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
1664 ; VBITS_GE_2048-NEXT: ret
1665   %op = load <64 x float>, <64 x float>* %a
1666   %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op)
1667   store <64 x float> %res, <64 x float>* %a
1668   ret void
1669 }
1670
1671 ; Don't use SVE for 64-bit vectors.
1672 define <1 x double> @frintz_v1f64(<1 x double> %op) #0 {
1673 ; CHECK-LABEL: frintz_v1f64:
1674 ; CHECK: frintz d0, d0
1675 ; CHECK-NEXT: ret
1676   %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
1677   ret <1 x double> %res
1678 }
1679
1680 ; Don't use SVE for 128-bit vectors.
1681 define <2 x double> @frintz_v2f64(<2 x double> %op) #0 {
1682 ; CHECK-LABEL: frintz_v2f64:
1683 ; CHECK: frintz v0.2d, v0.2d
1684 ; CHECK-NEXT: ret
1685   %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op)
1686   ret <2 x double> %res
1687 }
1688
1689 define void @frintz_v4f64(<4 x double>* %a) #0 {
1690 ; CHECK-LABEL: frintz_v4f64:
1691 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
1692 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1693 ; CHECK-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1694 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1695 ; CHECK-NEXT: ret
1696   %op = load <4 x double>, <4 x double>* %a
1697   %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
1698   store <4 x double> %res, <4 x double>* %a
1699   ret void
1700 }
1701
1702 define void @frintz_v8f64(<8 x double>* %a) #0 {
1703 ; CHECK-LABEL: frintz_v8f64:
1704 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
1705 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1706 ; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1707 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1708 ; VBITS_GE_512-NEXT: ret
1709
1710 ; Ensure sensible type legalisation.
1711 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
1712 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
1713 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0]
1714 ; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
1715 ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d
1716 ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d
1717 ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0]
1718 ; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
1719 ; VBITS_EQ_256-NEXT: ret
1720   %op = load <8 x double>, <8 x double>* %a
1721   %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op)
1722   store <8 x double> %res, <8 x double>* %a
1723   ret void
1724 }
1725
1726 define void @frintz_v16f64(<16 x double>* %a) #0 {
1727 ; CHECK-LABEL: frintz_v16f64:
1728 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
1729 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1730 ; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1731 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1732 ; VBITS_GE_1024-NEXT: ret
1733   %op = load <16 x double>, <16 x double>* %a
1734   %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op)
1735   store <16 x double> %res, <16 x double>* %a
1736   ret void
1737 }
1738
1739 define void @frintz_v32f64(<32 x double>* %a) #0 {
1740 ; CHECK-LABEL: frintz_v32f64:
1741 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
1742 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
1743 ; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d
1744 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
1745 ; VBITS_GE_2048-NEXT: ret
1746   %op = load <32 x double>, <32 x double>* %a
1747   %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op)
1748   store <32 x double> %res, <32 x double>* %a
1749   ret void
1750 }
1751
1752 attributes #0 = { "target-features"="+sve" }
1753
1754 declare <4 x half> @llvm.ceil.v4f16(<4 x half>)
1755 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
1756 declare <16 x half> @llvm.ceil.v16f16(<16 x half>)
1757 declare <32 x half> @llvm.ceil.v32f16(<32 x half>)
1758 declare <64 x half> @llvm.ceil.v64f16(<64 x half>)
1759 declare <128 x half> @llvm.ceil.v128f16(<128 x half>)
1760 declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
1761 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
1762 declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
1763 declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
1764 declare <32 x float> @llvm.ceil.v32f32(<32 x float>)
1765 declare <64 x float> @llvm.ceil.v64f32(<64 x float>)
1766 declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
1767 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
1768 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
1769 declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
1770 declare <16 x double> @llvm.ceil.v16f64(<16 x double>)
1771 declare <32 x double> @llvm.ceil.v32f64(<32 x double>)
1772
1773 declare <4 x half> @llvm.floor.v4f16(<4 x half>)
1774 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
1775 declare <16 x half> @llvm.floor.v16f16(<16 x half>)
1776 declare <32 x half> @llvm.floor.v32f16(<32 x half>)
1777 declare <64 x half> @llvm.floor.v64f16(<64 x half>)
1778 declare <128 x half> @llvm.floor.v128f16(<128 x half>)
1779 declare <2 x float> @llvm.floor.v2f32(<2 x float>)
1780 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
1781 declare <8 x float> @llvm.floor.v8f32(<8 x float>)
1782 declare <16 x float> @llvm.floor.v16f32(<16 x float>)
1783 declare <32 x float> @llvm.floor.v32f32(<32 x float>)
1784 declare <64 x float> @llvm.floor.v64f32(<64 x float>)
1785 declare <1 x double> @llvm.floor.v1f64(<1 x double>)
1786 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
1787 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
1788 declare <8 x double> @llvm.floor.v8f64(<8 x double>)
1789 declare <16 x double> @llvm.floor.v16f64(<16 x double>)
1790 declare <32 x double> @llvm.floor.v32f64(<32 x double>)
1791
1792 declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>)
1793 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
1794 declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>)
1795 declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>)
1796 declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>)
1797 declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>)
1798 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
1799 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
1800 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
1801 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
1802 declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>)
1803 declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>)
1804 declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
1805 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
1806 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
1807 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
1808 declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>)
1809 declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>)
1810
1811 declare <4 x half> @llvm.rint.v4f16(<4 x half>)
1812 declare <8 x half> @llvm.rint.v8f16(<8 x half>)
1813 declare <16 x half> @llvm.rint.v16f16(<16 x half>)
1814 declare <32 x half> @llvm.rint.v32f16(<32 x half>)
1815 declare <64 x half> @llvm.rint.v64f16(<64 x half>)
1816 declare <128 x half> @llvm.rint.v128f16(<128 x half>)
1817 declare <2 x float> @llvm.rint.v2f32(<2 x float>)
1818 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
1819 declare <8 x float> @llvm.rint.v8f32(<8 x float>)
1820 declare <16 x float> @llvm.rint.v16f32(<16 x float>)
1821 declare <32 x float> @llvm.rint.v32f32(<32 x float>)
1822 declare <64 x float> @llvm.rint.v64f32(<64 x float>)
1823 declare <1 x double> @llvm.rint.v1f64(<1 x double>)
1824 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
1825 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
1826 declare <8 x double> @llvm.rint.v8f64(<8 x double>)
1827 declare <16 x double> @llvm.rint.v16f64(<16 x double>)
1828 declare <32 x double> @llvm.rint.v32f64(<32 x double>)
1829
1830 declare <4 x half> @llvm.round.v4f16(<4 x half>)
1831 declare <8 x half> @llvm.round.v8f16(<8 x half>)
1832 declare <16 x half> @llvm.round.v16f16(<16 x half>)
1833 declare <32 x half> @llvm.round.v32f16(<32 x half>)
1834 declare <64 x half> @llvm.round.v64f16(<64 x half>)
1835 declare <128 x half> @llvm.round.v128f16(<128 x half>)
1836 declare <2 x float> @llvm.round.v2f32(<2 x float>)
1837 declare <4 x float> @llvm.round.v4f32(<4 x float>)
1838 declare <8 x float> @llvm.round.v8f32(<8 x float>)
1839 declare <16 x float> @llvm.round.v16f32(<16 x float>)
1840 declare <32 x float> @llvm.round.v32f32(<32 x float>)
1841 declare <64 x float> @llvm.round.v64f32(<64 x float>)
1842 declare <1 x double> @llvm.round.v1f64(<1 x double>)
1843 declare <2 x double> @llvm.round.v2f64(<2 x double>)
1844 declare <4 x double> @llvm.round.v4f64(<4 x double>)
1845 declare <8 x double> @llvm.round.v8f64(<8 x double>)
1846 declare <16 x double> @llvm.round.v16f64(<16 x double>)
1847 declare <32 x double> @llvm.round.v32f64(<32 x double>)
1848
1849 declare <4 x half> @llvm.roundeven.v4f16(<4 x half>)
1850 declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
1851 declare <16 x half> @llvm.roundeven.v16f16(<16 x half>)
1852 declare <32 x half> @llvm.roundeven.v32f16(<32 x half>)
1853 declare <64 x half> @llvm.roundeven.v64f16(<64 x half>)
1854 declare <128 x half> @llvm.roundeven.v128f16(<128 x half>)
1855 declare <2 x float> @llvm.roundeven.v2f32(<2 x float>)
1856 declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
1857 declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
1858 declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
1859 declare <32 x float> @llvm.roundeven.v32f32(<32 x float>)
1860 declare <64 x float> @llvm.roundeven.v64f32(<64 x float>)
1861 declare <1 x double> @llvm.roundeven.v1f64(<1 x double>)
1862 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
1863 declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
1864 declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
1865 declare <16 x double> @llvm.roundeven.v16f64(<16 x double>)
1866 declare <32 x double> @llvm.roundeven.v32f64(<32 x double>)
1867
1868 declare <4 x half> @llvm.trunc.v4f16(<4 x half>)
1869 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
1870 declare <16 x half> @llvm.trunc.v16f16(<16 x half>)
1871 declare <32 x half> @llvm.trunc.v32f16(<32 x half>)
1872 declare <64 x half> @llvm.trunc.v64f16(<64 x half>)
1873 declare <128 x half> @llvm.trunc.v128f16(<128 x half>)
1874 declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
1875 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
1876 declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
1877 declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
1878 declare <32 x float> @llvm.trunc.v32f32(<32 x float>)
1879 declare <64 x float> @llvm.trunc.v64f32(<64 x float>)
1880 declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
1881 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
1882 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
1883 declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
1884 declare <16 x double> @llvm.trunc.v16f64(<16 x double>)
1885 declare <32 x double> @llvm.trunc.v32f64(<32 x double>)