1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
4 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
6 define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
8 ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1
9 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1
10 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
11 ; Without advanced copy optimization, we end up with cross register
12 ; banks copies that cannot be coalesced.
13 ; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
14 ; With advanced copy optimization, we end up with just one copy
15 ; to insert the computed high part into the V register.
17 ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
18 ; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
20 ; CHECK: mov.d v0[1], [[COPY_REG2]]
24 ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
25 ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
26 ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
27 ; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
28 ; GENERIC-OPT-NOT: fmov
29 ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
30 ; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
31 ; GENERIC-OPT-NOT: fmov
32 ; GENERIC: mov v0.d[1], [[COPY_REG2]]
34 %add = add <2 x i64> %a, %b
35 %vgetq_lane = extractelement <2 x i64> %add, i32 0
36 %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
37 %add3 = add i64 %vgetq_lane, %vgetq_lane2
38 %sub = sub i64 %vgetq_lane, %vgetq_lane2
39 %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
40 %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
41 ret <2 x i64> %vecinit8
44 define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
45 ; CHECK-LABEL: subdd_su64:
46 ; CHECK: sub d0, d1, d0
48 ; GENERIC-LABEL: subdd_su64:
49 ; GENERIC: sub d0, d1, d0
51 %vecext = extractelement <2 x i64> %a, i32 0
52 %vecext1 = extractelement <2 x i64> %b, i32 0
53 %sub.i = sub nsw i64 %vecext1, %vecext
54 %retval = bitcast i64 %sub.i to double
58 define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
59 ; CHECK-LABEL: vaddd_su64:
60 ; CHECK: add d0, d1, d0
62 ; GENERIC-LABEL: vaddd_su64:
63 ; GENERIC: add d0, d1, d0
65 %vecext = extractelement <2 x i64> %a, i32 0
66 %vecext1 = extractelement <2 x i64> %b, i32 0
67 %add.i = add nsw i64 %vecext1, %vecext
68 %retval = bitcast i64 %add.i to double
72 ; sub MI doesn't access dsub register.
73 define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
74 ; CHECK-LABEL: add_sub_su64:
75 ; CHECK: add d0, d1, d0
76 ; CHECK: sub d0, {{d[0-9]+}}, d0
78 ; GENERIC-LABEL: add_sub_su64:
79 ; GENERIC: add d0, d1, d0
80 ; GENERIC: sub d0, {{d[0-9]+}}, d0
82 %vecext = extractelement <2 x i64> %a, i32 0
83 %vecext1 = extractelement <2 x i64> %b, i32 0
84 %add.i = add i64 %vecext1, %vecext
85 %sub.i = sub i64 0, %add.i
86 %retval = bitcast i64 %sub.i to double
89 define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
90 ; CHECK-LABEL: and_su64:
91 ; CHECK: and.8b v0, v1, v0
93 ; GENERIC-LABEL: and_su64:
94 ; GENERIC: and v0.8b, v1.8b, v0.8b
96 %vecext = extractelement <2 x i64> %a, i32 0
97 %vecext1 = extractelement <2 x i64> %b, i32 0
98 %or.i = and i64 %vecext1, %vecext
99 %retval = bitcast i64 %or.i to double
103 define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
104 ; CHECK-LABEL: orr_su64:
105 ; CHECK: orr.8b v0, v1, v0
107 ; GENERIC-LABEL: orr_su64:
108 ; GENERIC: orr v0.8b, v1.8b, v0.8b
110 %vecext = extractelement <2 x i64> %a, i32 0
111 %vecext1 = extractelement <2 x i64> %b, i32 0
112 %or.i = or i64 %vecext1, %vecext
113 %retval = bitcast i64 %or.i to double
117 define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
118 ; CHECK-LABEL: xorr_su64:
119 ; CHECK: eor.8b v0, v1, v0
121 ; GENERIC-LABEL: xorr_su64:
122 ; GENERIC: eor v0.8b, v1.8b, v0.8b
124 %vecext = extractelement <2 x i64> %a, i32 0
125 %vecext1 = extractelement <2 x i64> %b, i32 0
126 %xor.i = xor i64 %vecext1, %vecext
127 %retval = bitcast i64 %xor.i to double