Merge branch '138-toggle-free-look-with-hotkey' into 'main/atys-live'
[ryzomcore.git] / nel / src / misc / fast_mem.cpp
blobc957195b3356930a228a9cd81c58b8371c64b79c
1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Affero General Public License as
6 // published by the Free Software Foundation, either version 3 of the
7 // License, or (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Affero General Public License for more details.
14 // You should have received a copy of the GNU Affero General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #include "stdmisc.h"
19 #include "nel/misc/fast_mem.h"
20 #include "nel/misc/system_info.h"
22 #ifdef DEBUG_NEW
23 #define new DEBUG_NEW
24 #endif
26 namespace NLMISC
29 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
32 // ***************************************************************************
33 void *CFastMem::memcpySSE(void *dest, const void *src, size_t nbytes)
35 _asm
37 mov esi, src
38 mov edi, dest
39 mov ebx, nbytes
41 // edx takes number of bytes%64
42 mov edx, ebx
43 and edx, 63
45 // ebx takes number of bytes/64
46 shr ebx, 6
47 jz byteCopy
50 loop4k: // flush 4k into temporary buffer
51 push esi
52 mov ecx, ebx
53 // copy per block of 64 bytes. Must not override 64*64= 4096 bytes.
54 cmp ecx, 64
55 jle skipMiniMize
56 mov ecx, 64
57 skipMiniMize:
58 // eax takes the number of 64bytes packet for this block.
59 mov eax, ecx
61 loopMemToL1:
62 prefetchnta 64[ESI] // Prefetch next loop, non-temporal
63 prefetchnta 96[ESI]
65 movq mm1, 0[ESI] // Read in source data
66 movq mm2, 8[ESI]
67 movq mm3, 16[ESI]
68 movq mm4, 24[ESI]
69 movq mm5, 32[ESI]
70 movq mm6, 40[ESI]
71 movq mm7, 48[ESI]
72 movq mm0, 56[ESI]
74 add esi, 64
75 dec ecx
76 jnz loopMemToL1
78 pop esi // Now copy from L1 to system memory
79 mov ecx, eax
81 loopL1ToMem:
82 movq mm1, 0[ESI] // Read in source data from L1
83 movq mm2, 8[ESI]
84 movq mm3, 16[ESI]
85 movq mm4, 24[ESI]
86 movq mm5, 32[ESI]
87 movq mm6, 40[ESI]
88 movq mm7, 48[ESI]
89 movq mm0, 56[ESI]
91 movntq 0[EDI], mm1 // Non-temporal stores
92 movntq 8[EDI], mm2
93 movntq 16[EDI], mm3
94 movntq 24[EDI], mm4
95 movntq 32[EDI], mm5
96 movntq 40[EDI], mm6
97 movntq 48[EDI], mm7
98 movntq 56[EDI], mm0
100 add esi, 64
101 add edi, 64
102 dec ecx
103 jnz loopL1ToMem
105 // Do next 4k block
106 sub ebx, eax
107 jnz loop4k
109 emms
111 byteCopy:
112 // Do last bytes with std cpy
113 mov ecx, edx
114 rep movsb
116 return dest;
119 // ***************************************************************************
120 void CFastMem::precacheSSE(const void *src, uint nbytes)
122 _asm
124 mov esi, src
125 mov ecx, nbytes
126 // 64 bytes per pass
127 shr ecx, 6
128 jz endLabel
130 loopMemToL1:
131 prefetchnta 64[ESI] // Prefetch next loop, non-temporal
132 prefetchnta 96[ESI]
134 movq mm1, 0[ESI] // Read in source data
135 movq mm2, 8[ESI]
136 movq mm3, 16[ESI]
137 movq mm4, 24[ESI]
138 movq mm5, 32[ESI]
139 movq mm6, 40[ESI]
140 movq mm7, 48[ESI]
141 movq mm0, 56[ESI]
143 add esi, 64
144 dec ecx
145 jnz loopMemToL1
147 emms
149 endLabel:
153 // ***************************************************************************
154 void CFastMem::precacheMMX(const void *src, uint nbytes)
156 _asm
158 mov esi, src
159 mov ecx, nbytes
160 // 64 bytes per pass
161 shr ecx, 6
162 jz endLabel
164 loopMemToL1:
165 movq mm1, 0[ESI] // Read in source data
166 movq mm2, 8[ESI]
167 movq mm3, 16[ESI]
168 movq mm4, 24[ESI]
169 movq mm5, 32[ESI]
170 movq mm6, 40[ESI]
171 movq mm7, 48[ESI]
172 movq mm0, 56[ESI]
174 add esi, 64
175 dec ecx
176 jnz loopMemToL1
178 emms
180 endLabel:
185 // ***************************************************************************
186 void CFastMem::precache(const void *src, uint nbytes)
188 if(NLMISC::CSystemInfo::hasSSE())
189 precacheSSE(src, nbytes);
190 else if(NLMISC::CSystemInfo::hasMMX())
191 precacheMMX(src, nbytes);
195 #else
198 // ***************************************************************************
199 void *CFastMem::memcpySSE(void *dst, const void *src, size_t nbytes)
201 // Use std memcpy.
202 return memcpy(dst, src, nbytes);
204 void CFastMem::precacheSSE(const void *src, uint nbytes)
206 // no-op.
208 void CFastMem::precacheMMX(const void *src, uint nbytes)
210 // no-op.
212 void CFastMem::precache(const void *src, uint nbytes)
214 // no-op.
217 #endif
219 typedef void *(*memcpyPtr)(void *dts, const void *src, size_t nbytes);
221 static memcpyPtr findBestmemcpy ()
223 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
224 if (CSystemInfo::hasSSE ())
225 return CFastMem::memcpySSE;
226 else
227 return ::memcpy;
228 #else // NL_OS_WINDOWS
229 return ::memcpy;
230 #endif // NL_OS_WINDOWS
233 void *(*CFastMem::memcpy)(void *dts, const void *src, size_t nbytes) = findBestmemcpy ();
235 } // NLMISC