memcpy: hide some memory latencies
[nova-simd.git] / benchmarks / malloc_aligned.hpp
bloba9e447de35470109b0442a1349e73f218098171a
1 // functions for aligned memory allocation
2 // Copyright (C) 2009 Tim Blechmann
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with this program; see the file COPYING. If not, write to
16 // the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 // Boston, MA 02111-1307, USA.
19 #ifndef UTILITIES_MALLOC_ALIGNED_HPP
20 #define UTILITIES_MALLOC_ALIGNED_HPP
22 #include <cstdlib>
23 #include <cstring>
25 #include <boost/noncopyable.hpp>
27 #ifdef __SSE2__
28 #include <xmmintrin.h>
29 #elif defined(HAVE_TBB)
30 #include <tbb/cache_aligned_allocator.h>
31 #endif /* HAVE_TBB */
34 namespace nova
37 #if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600
38 /* we have posix_memalign */
40 /* memory alignment constraints:
42 * - 16 byte for SSE operations
43 * - the cache lines size of modern x86 cpus is 64 bytes (pentium-m, pentium 4, core, k8)
45 const int malloc_memory_alignment = 64;
47 inline void* malloc_aligned(std::size_t nbytes)
49 void * ret;
50 int status = posix_memalign(&ret, malloc_memory_alignment, nbytes);
51 if (!status)
52 return ret;
53 else
54 return 0;
57 inline void free_aligned(void *ptr)
59 free(ptr);
62 #elif defined(__APPLE__)
64 const int malloc_memory_alignment = 64;
66 /* apple's malloc implementation returns 16-byte aligned chunks */
67 inline void* malloc_aligned(std::size_t nbytes)
69 return malloc(nbytes);
72 inline void free_aligned(void *ptr)
74 free(ptr);
78 #elif defined(__SSE2__)
80 const int malloc_memory_alignment = 64;
82 inline void* malloc_aligned(std::size_t nbytes)
84 return _mm_malloc(nbytes, malloc_memory_alignment);
87 inline void free_aligned(void *ptr)
89 _mm_free(ptr);
92 #elif defined(HAVE_TBB)
94 inline void* malloc_aligned(std::size_t nbytes)
96 tbb::cache_aligned_allocator<void*> ca_alloc;
97 return static_cast<void*>(ca_alloc.allocate(nbytes));
100 inline void free_aligned(void *ptr)
102 tbb::cache_aligned_allocator<void*> ca_alloc;
103 ca_alloc.deallocate(static_cast<void**>(ptr), 0);
106 #else
108 /* on other systems, we use the aligned memory allocation taken
109 * from thomas grill's implementation for pd */
110 #define VECTORALIGNMENT 128
111 inline void* malloc_aligned(std::size_t nbytes)
113 void* vec = malloc(nbytes+ (VECTORALIGNMENT/8-1) + sizeof(void *));
115 if (vec != NULL)
117 /* get alignment of first possible signal vector byte */
118 long alignment = ((long)vec+sizeof(void *))&(VECTORALIGNMENT/8-1);
119 /* calculate aligned pointer */
120 void *ret = (unsigned char *)vec+sizeof(void *)+(alignment == 0?0:VECTORALIGNMENT/8-alignment);
121 /* save original memory location */
122 *(void **)((unsigned char *)ret-sizeof(void *)) = vec;
123 return ret;
125 else
126 return 0;
129 inline void free_aligned(void *ptr)
131 /* get original memory location */
132 void *ori = *(void **)((unsigned char *)ptr-sizeof(void *));
133 free(ori);
136 #undef VECTORALIGNMENT
138 #endif
140 inline void * calloc_aligned(std::size_t nbytes)
142 void * ret = malloc_aligned(nbytes);
143 if (ret)
144 std::memset(ret, 0, nbytes);
145 return ret;
148 template <typename T>
149 T* malloc_aligned(std::size_t n)
151 return static_cast<T*>(malloc_aligned(n * sizeof(T)));
154 template <typename T>
155 T* calloc_aligned(std::size_t n)
157 return static_cast<T*>(calloc_aligned(n * sizeof(T)));
161 /** aligned allocator. uses malloc_aligned and free_aligned internally
162 * */
163 template <class T>
164 class aligned_allocator
166 public:
167 typedef std::size_t size_type;
168 typedef std::ptrdiff_t difference_type;
169 typedef T* pointer;
170 typedef const T* const_pointer;
171 typedef T& reference;
172 typedef const T& const_reference;
173 typedef T value_type;
175 template <class U> struct rebind
177 typedef aligned_allocator<U> other;
180 pointer address(reference x) const
182 return &x;
185 const_pointer address(const_reference x) const
187 return &x;
190 pointer allocate(size_type n,
191 const_pointer hint = 0)
193 pointer ret = malloc_aligned<T>(n);
194 if (ret == 0)
195 throw std::bad_alloc();
196 return ret;
199 void deallocate(pointer p, size_type n)
201 return free_aligned(p);
204 size_type max_size() const throw()
206 return size_type(-1) / sizeof(T);
209 void construct(pointer p, const T& val)
211 ::new(p) T(val);
214 void destroy(pointer p)
216 p->~T();
221 template<typename T, typename U>
222 bool operator==( aligned_allocator<T> const& left, aligned_allocator<U> const& right )
224 return !(left != right);
227 template<typename T, typename U>
228 bool operator!=( aligned_allocator<T> const& left, aligned_allocator<U> const& right )
230 return true;
234 /** smart-pointer, freeing the managed pointer via free_aligned */
235 template<class T, bool managed = true>
236 class aligned_storage_ptr
238 public:
239 explicit aligned_storage_ptr(T * p = 0):
240 ptr(p)
243 ~aligned_storage_ptr(void)
245 if (managed && ptr)
246 free_aligned(ptr);
249 void reset(T * p = 0)
251 if (managed && ptr)
252 free_aligned(ptr);
253 ptr = p;
256 T & operator*() const
258 return *ptr;
261 T * operator->() const
263 return ptr;
266 T * get() const
268 return ptr;
271 aligned_storage_ptr & operator=(T * p)
273 reset(p);
274 return *this;
277 operator bool() const
279 return bool(ptr);
282 void swap(aligned_storage_ptr & b)
284 T * p = ptr;
285 ptr = b.ptr;
286 b.ptr = p;
289 private:
290 T * ptr;
293 } /* namespace nova */
295 #endif /* UTILITIES_MALLOC_ALIGNED_HPP */