2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements AlignedAllocator.
39 * \author Erik Lindahl <erik.lindahl@gmail.com>
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
41 * \ingroup module_utility
45 #include "alignedallocator.h"
54 # include <mm_malloc.h>
57 #elif HAVE_XMMINTRIN_H
58 # include <xmmintrin.h>
65 #if GMX_NATIVE_WINDOWS
66 # include <windows.h> // only for the page size query purposes
69 #include "gromacs/utility/gmxassert.h"
77 /*! \brief Allocate aligned memory in a fully portable way
79 * \param bytes Amount of memory (bytes) to allocate. The routine will return
80 * nullptr if the allocation fails. However, note that asking for
81 * zero bytes will return a pointer that is non-null and properly
82 * aligned (but obviously you cannot use it, since you promised
83 * not to access data beyond the 0 bytes you asked for).
85 * \param alignment Alignment specification in bytes, must be a power of 2.
87 * \return Nonzero pointer if the allocation worked, otherwise nullptr.
88 * This routine should only be called from alignedMalloc(), which also does
89 * the checking for valid values. This particular function is used for platforms
90 * where we have no control of the alignment of memory returned by the system.
91 * Instead, we increase the amount of memory requested internally such that we
92 * both can create a pointer inside this memory that fulfills the memory
93 * alignment requested, and that we have room to store the original pointer
94 * just before this area.
96 * \note This is an internal routine that should only be called from
97 * gmx::alignedMalloc(). Just like system-provided routines, it provides
98 * memory that is aligned - but not padded.
100 gmx_unused
void* alignedMallocGeneric(std::size_t bytes
, std::size_t alignment
)
102 // The amount of extra memory (beyound what the user asked for) we need is:
103 // - sizeof(void *), to store the original pointer
104 // - alignment, to make sure we have an aligned pointer in the area
105 void* pMalloc
= malloc(bytes
+ sizeof(void*) + alignment
);
107 if (pMalloc
== nullptr)
112 // Convert pMalloc to size_t (so we work with raw bytes), add the space we
113 // need to save the original pointer, and (alignment-1) bytes, and then mask
114 // out the lowest bits.
115 std::size_t mask
= ~static_cast<std::size_t>(alignment
- 1);
116 void* pAligned
= reinterpret_cast<void*>(
117 (reinterpret_cast<std::size_t>(pMalloc
) + sizeof(void*) + alignment
- 1) & mask
);
119 // Store original pointer. Since we allocated at least sizeof(void *) extra
120 // space this is always a valid memory location.
121 reinterpret_cast<void**>(pAligned
)[-1] = pMalloc
;
127 /*! \brief Free aligned memory
129 * \param p Memory pointer previously returned from
130 * gmx::internal::alignedFreePortable().
132 * Since this routine relies on the original pointer being stored just before
133 * the memory area p points to, bad things will happen if you call this routine
134 * with a pointer obtained any other way, or if you call the system free()
135 * with a pointer obtained from std::alignedMalloc().
137 * \note This is an internal routine that should only be called from
138 * gmx::alignedFree().
140 gmx_unused
void alignedFreeGeneric(void* p
)
144 // Pick up the pointer stored just below p, and use that to call free()
145 free(reinterpret_cast<void**>(p
)[-1]);
149 //! Implement malloc of \c bytes of memory, aligned to \c alignment.
150 void* mallocImpl(std::size_t bytes
, std::size_t alignment
)
155 p
= _mm_malloc(bytes
, alignment
);
156 #elif HAVE_POSIX_MEMALIGN
157 if (posix_memalign(&p
, alignment
, bytes
) != 0)
162 p
= memalign(alignment
, bytes
);
163 #elif HAVE__ALIGNED_MALLOC
164 p
= _aligned_malloc(bytes
, alignment
);
166 p
= internal::alignedMallocGeneric(bytes
, alignment
);
172 //! Free aligned memory allocated with mallocImpl().
173 void freeImpl(void* p
)
179 #elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN
181 #elif HAVE__ALIGNED_MALLOC
184 internal::alignedFreeGeneric(p
);
191 // === AlignedAllocationPolicy
193 std::size_t AlignedAllocationPolicy::alignment()
195 // For now we always use 128-byte alignment:
196 // 1) IBM Power already has cache lines of 128-bytes, and needs it.
197 // 2) x86 has 64 byte cache lines, but since a future AVX-1024 (rumored?)
198 // will need 1024/8=128 byte SIMD alignment, it is safer to use that
200 // 3) The old Pentium4 used 256-byte cache prefetching (but 64-byte lines).
201 // However, it's not worth worrying about performance for P4...
202 // 4) ARM & Sparc have 64 byte lines, but will be just fine with
203 // 128-byte alignment (nobody knows what the future brings)
205 // So, for now we're semi-lazy and just align to 128 bytes!
207 // TODO LINCS code is copying this assumption independently (for now)
211 void* AlignedAllocationPolicy::malloc(std::size_t bytes
)
213 // Pad memory at the end with another alignment bytes to avoid false sharing
214 auto size
= alignment();
217 return mallocImpl(bytes
, size
);
220 void AlignedAllocationPolicy::free(void* p
)
225 // === PageAlignedAllocationPolicy
227 //! Return a page size, from a sysconf/WinAPI query if available, or a default guess (4096 bytes).
228 //! \todo Move this function into sysinfo.cpp where other OS-specific code/includes live
229 static std::size_t getPageSize()
232 #if GMX_NATIVE_WINDOWS
234 GetNativeSystemInfo(&si
);
235 pageSize
= si
.dwPageSize
;
236 #elif defined(_SC_PAGESIZE)
237 /* Note that sysconf returns -1 on its error conditions, which we
238 don't really need to check, nor can really handle at
239 initialization time. */
240 pageSize
= sysconf(_SC_PAGESIZE
);
241 #elif defined(_SC_PAGE_SIZE)
242 pageSize
= sysconf(_SC_PAGE_SIZE
);
246 return ((pageSize
== -1) ? 4096 // A useful guess
247 : static_cast<std::size_t>(pageSize
));
250 /* Implements the "construct on first use" idiom to avoid the static
251 * initialization order fiasco where a possible static page-aligned
252 * container would be initialized before the alignment variable was.
254 * Note that thread-safety of the initialization is guaranteed by the
255 * C++11 language standard.
257 * The size_t has no destructor, so there is no deinitialization
258 * issue. See https://isocpp.org/wiki/faq/ctors for discussion of
259 * alternatives and trade-offs. */
260 std::size_t PageAlignedAllocationPolicy::alignment()
262 static size_t thePageSize
= getPageSize();
266 void* PageAlignedAllocationPolicy::malloc(std::size_t bytes
)
268 return mallocImpl(bytes
, alignment());
271 void PageAlignedAllocationPolicy::free(void* p
)