1 //===----------------------Hexagon builtin routine ------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // An optimized version of a memcpy which is equivalent to the following loop:
11 // volatile unsigned *dest;
14 // for (i = 0; i < num_words; ++i)
17 // The corresponding C prototype for this function would be
18 // void hexagon_memcpy_forward_vp4cp4n2(volatile unsigned *dest,
19 // const unsigned *src,
20 // unsigned num_words);
22 // *** Both dest and src must be aligned to 32-bit boundaries. ***
23 // The code does not perform any runtime checks for this, and will fail
24 // in bad ways if this requirement is not met.
26 // The "forward" in the name refers to the fact that the function copies
27 // the words going forward in memory. It is incorrect to use this function
28 // for cases where the original code copied words in any other order.
30 // *** This function is only for the use by the compiler. ***
31 // The only indended use is for the LLVM compiler to generate calls to
32 // this function, when a mem-copy loop, like the one above, is detected.
41 .globl hexagon_memcpy_forward_vp4cp4n2
43 .type hexagon_memcpy_forward_vp4cp4n2,@function
44 hexagon_memcpy_forward_vp4cp4n2:
46 // Compute r3 to be the number of words remaining in the current page.
47 // At the same time, compute r4 to be the number of 32-byte blocks
48 // remaining in the page (for prefetch).
54 // The word count before end-of-page is in the 12 lowest bits of r3.
55 // (If the address in r1 was already page-aligned, the bits are 0.)
56 r3 = extractu(r3, #10, #2)
57 r4 = extractu(r3, #7, #5)
64 r4 = or(r4, ##2105344) // 2105344 = 0x202000
66 if (p0.new) jump:nt .Lskipprolog
71 r2 = sub(r2, r3) // r2 = number of words left after the prolog.
81 // Let r3 = number of whole pages left (page = 1024 words).
83 if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain
87 r2 = extractu(r2, #10, #0) // r2 = r2 & 1023
88 r3 = ##2105472 // r3 = 0x202080 (prefetch info)
90 // Iterate over pages.
93 // Prefetch each individual page.
105 r3 = ##2105344 // r3 = 0x202000 (prefetch info)
106 r4 = lsr(r2, #3) // r4 = number of 32-byte blocks remaining.
108 if (p0.new) jumpr:nt r31
119 memw(r0++#4) = r4.new
124 .size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2