1 /* babl - dynamically extendable universal pixel conversion library.
2 * Copyright (C) 2015 Daniel Sabo
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 3 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General
15 * Public License along with this library; if not, see
16 * <https://www.gnu.org/licenses/>.
21 #if defined(USE_SSE4_1) && defined(USE_F16C) && defined(ARCH_X86_64)
23 #include <immintrin.h>
29 #include "babl-cpuaccel.h"
30 #include "extensions/util.h"
33 conv_yHalf_yF (const Babl
*conversion
,
38 const uint64_t *s_vec
;
43 s_vec
= (const uint64_t *)src
;
44 d_vec
= (__v4sf
*)dst
;
48 __m128i in_val
= _mm_insert_epi64((__m128i
)_mm_setzero_ps(), *s_vec
++, 0);
49 __v4sf out_val
= (__v4sf
)_mm_cvtph_ps(in_val
);
50 _mm_storeu_ps((float *)d_vec
++, out_val
);
54 src
= (const uint16_t *)s_vec
;
59 __m128i in_val
= _mm_insert_epi16((__m128i
)_mm_setzero_ps(), *src
++, 0);
60 __v4sf out_val
= (__v4sf
)_mm_cvtph_ps(in_val
);
61 _mm_store_ss(dst
++, out_val
);
67 conv_yaHalf_yaF (const Babl
*conversion
,
72 conv_yHalf_yF (conversion
, src
, dst
, samples
* 2);
76 conv_rgbHalf_rgbF (const Babl
*conversion
,
81 conv_yHalf_yF (conversion
, src
, dst
, samples
* 3);
85 conv_rgbaHalf_rgbaF (const Babl
*conversion
,
90 conv_yHalf_yF (conversion
, src
, dst
, samples
* 4);
94 conv_yF_yHalf (const Babl
*conversion
,
104 s_vec
= (const __v4sf
*)src
;
105 d_vec
= (uint64_t *)dst
;
109 __m128 in_val
= _mm_loadu_ps((float *)s_vec
++);
110 __m128i out_val
= _mm_cvtps_ph(in_val
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
111 _mm_storel_epi64((__m128i
*)d_vec
++, out_val
);
115 src
= (const float *)s_vec
;
116 dst
= (uint16_t *)d_vec
;
120 __m128 in_val
= _mm_load_ss(src
++);
121 __m128i out_val
= _mm_cvtps_ph(in_val
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
122 *dst
++ = _mm_extract_epi16(out_val
, 0);
128 conv_yaF_yaHalf (const Babl
*conversion
,
133 conv_yF_yHalf (conversion
, src
, dst
, samples
* 2);
136 #define conv_yAF_yAHalf conv_yaF_yaHalf
137 #define conv_yAHalf_yAF conv_yaHalf_yaF
140 conv_rgbF_rgbHalf (const Babl
*conversion
,
145 conv_yF_yHalf (conversion
, src
, dst
, samples
* 3);
149 conv_rgbaF_rgbaHalf (const Babl
*conversion
,
154 conv_yF_yHalf (conversion
, src
, dst
, samples
* 4);
157 #endif /* defined(USE_SSE4_1) && defined(USE_F16C) && defined(ARCH_X86_64) */
164 #if defined(USE_SSE4_1) && defined(USE_F16C) && defined(ARCH_X86_64)
165 const Babl
*rgbaF_linear
= babl_format_new (
168 babl_component ("R"),
169 babl_component ("G"),
170 babl_component ("B"),
171 babl_component ("A"),
173 const Babl
*rgbaHalf_linear
= babl_format_new (
176 babl_component ("R"),
177 babl_component ("G"),
178 babl_component ("B"),
179 babl_component ("A"),
181 const Babl
*rgbaF_gamma
= babl_format_new (
182 babl_model ("R'G'B'A"),
184 babl_component ("R'"),
185 babl_component ("G'"),
186 babl_component ("B'"),
187 babl_component ("A"),
189 const Babl
*rgbaHalf_gamma
= babl_format_new (
190 babl_model ("R'G'B'A"),
192 babl_component ("R'"),
193 babl_component ("G'"),
194 babl_component ("B'"),
195 babl_component ("A"),
197 const Babl
*rgbF_linear
= babl_format_new (
200 babl_component ("R"),
201 babl_component ("G"),
202 babl_component ("B"),
204 const Babl
*rgbHalf_linear
= babl_format_new (
207 babl_component ("R"),
208 babl_component ("G"),
209 babl_component ("B"),
211 const Babl
*rgbF_gamma
= babl_format_new (
212 babl_model ("R'G'B'"),
214 babl_component ("R'"),
215 babl_component ("G'"),
216 babl_component ("B'"),
218 const Babl
*rgbHalf_gamma
= babl_format_new (
219 babl_model ("R'G'B'"),
221 babl_component ("R'"),
222 babl_component ("G'"),
223 babl_component ("B'"),
225 const Babl
*yaF_linear
= babl_format_new (
228 babl_component ("Y"),
229 babl_component ("A"),
231 const Babl
*yaHalf_linear
= babl_format_new (
234 babl_component ("Y"),
235 babl_component ("A"),
237 const Babl
*yaF_gamma
= babl_format_new (
240 babl_component ("Y'"),
241 babl_component ("A"),
243 const Babl
*yaHalf_gamma
= babl_format_new (
246 babl_component ("Y'"),
247 babl_component ("A"),
249 const Babl
*yAF_linear
= babl_format_new (
252 babl_component ("Ya"),
253 babl_component ("A"),
255 const Babl
*yAHalf_linear
= babl_format_new (
258 babl_component ("Ya"),
259 babl_component ("A"),
261 const Babl
*yAF_gamma
= babl_format_new (
264 babl_component ("Y'a"),
265 babl_component ("A"),
267 const Babl
*yAHalf_gamma
= babl_format_new (
270 babl_component ("Y'a"),
271 babl_component ("A"),
273 const Babl
*yF_linear
= babl_format_new (
276 babl_component ("Y"),
278 const Babl
*yHalf_linear
= babl_format_new (
281 babl_component ("Y"),
283 const Babl
*yF_gamma
= babl_format_new (
286 babl_component ("Y'"),
288 const Babl
*yHalf_gamma
= babl_format_new (
291 babl_component ("Y'"),
294 #define CONV(src, dst) \
296 babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv_ ## src ## _ ## dst, NULL); \
297 babl_conversion_new (src ## _gamma, dst ## _gamma, "linear", conv_ ## src ## _ ## dst, NULL); \
300 if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE4_1
) &&
301 (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_F16C
))
303 CONV(rgbaHalf
, rgbaF
);
308 CONV(rgbaF
, rgbaHalf
);
315 #endif /* defined(USE_SSE4_1) && defined(USE_F16C) && defined(ARCH_X86_64) */