babl: fix some annotation to make the function usable in bindings.
[babl.git] / extensions / sse4-int8.c
blob6c61bde6c43285d952e757127d0e7afee192a9df
1 /* babl - dynamically extendable universal pixel conversion library.
2 * Copyright (C) 2013 Daniel Sabo
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 3 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General
15 * Public License along with this library; if not, see
16 * <https://www.gnu.org/licenses/>.
19 #include "config.h"
21 #if defined(USE_SSE4_1)
23 /* SSE 4 */
24 #include <smmintrin.h>
26 #include <stdint.h>
27 #include <stdlib.h>
29 #include "babl.h"
30 #include "babl-cpuaccel.h"
31 #include "extensions/util.h"
33 static inline void
34 conv_y8_yF (const Babl *conversion,
35 const uint8_t *src,
36 float *dst,
37 long samples)
39 const float factor = 1.0f / 255.0f;
40 const __v4sf factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
41 const uint32_t *s_vec;
42 __v4sf *d_vec;
44 long n = samples;
46 s_vec = (const uint32_t *)src;
47 d_vec = (__v4sf *)dst;
49 while (n >= 4)
51 __m128i in_val;
52 __v4sf out_val;
53 in_val = _mm_insert_epi32 ((__m128i)_mm_setzero_ps(), *s_vec++, 0);
54 in_val = _mm_cvtepu8_epi32 (in_val);
55 out_val = _mm_cvtepi32_ps (in_val) * factor_vec;
56 _mm_storeu_ps ((float *)d_vec++, out_val);
57 n -= 4;
60 src = (const uint8_t *)s_vec;
61 dst = (float *)d_vec;
63 while (n)
65 *dst++ = (float)(*src++) * factor;
66 n -= 1;
70 static void
71 conv_ya8_yaF (const Babl *conversion,
72 const uint8_t *src,
73 float *dst,
74 long samples)
76 conv_y8_yF (conversion, src, dst, samples * 2);
79 static void
80 conv_rgb8_rgbF (const Babl *conversion,
81 const uint8_t *src,
82 float *dst,
83 long samples)
85 conv_y8_yF (conversion, src, dst, samples * 3);
88 static void
89 conv_rgba8_rgbaF (const Babl *conversion,
90 const uint8_t *src,
91 float *dst,
92 long samples)
94 conv_y8_yF (conversion, src, dst, samples * 4);
97 #endif
99 int init (void);
102 init (void)
104 #if defined(USE_SSE4_1)
105 const Babl *rgbaF_linear = babl_format_new (
106 babl_model ("RGBA"),
107 babl_type ("float"),
108 babl_component ("R"),
109 babl_component ("G"),
110 babl_component ("B"),
111 babl_component ("A"),
112 NULL);
113 const Babl *rgba8_linear = babl_format_new (
114 babl_model ("RGBA"),
115 babl_type ("u8"),
116 babl_component ("R"),
117 babl_component ("G"),
118 babl_component ("B"),
119 babl_component ("A"),
120 NULL);
121 const Babl *rgbaF_gamma = babl_format_new (
122 babl_model ("R'G'B'A"),
123 babl_type ("float"),
124 babl_component ("R'"),
125 babl_component ("G'"),
126 babl_component ("B'"),
127 babl_component ("A"),
128 NULL);
129 const Babl *rgba8_gamma = babl_format_new (
130 babl_model ("R'G'B'A"),
131 babl_type ("u8"),
132 babl_component ("R'"),
133 babl_component ("G'"),
134 babl_component ("B'"),
135 babl_component ("A"),
136 NULL);
137 const Babl *rgbF_linear = babl_format_new (
138 babl_model ("RGB"),
139 babl_type ("float"),
140 babl_component ("R"),
141 babl_component ("G"),
142 babl_component ("B"),
143 NULL);
144 const Babl *rgb8_linear = babl_format_new (
145 babl_model ("RGB"),
146 babl_type ("u8"),
147 babl_component ("R"),
148 babl_component ("G"),
149 babl_component ("B"),
150 NULL);
151 const Babl *rgbF_gamma = babl_format_new (
152 babl_model ("R'G'B'"),
153 babl_type ("float"),
154 babl_component ("R'"),
155 babl_component ("G'"),
156 babl_component ("B'"),
157 NULL);
158 const Babl *rgb8_gamma = babl_format_new (
159 babl_model ("R'G'B'"),
160 babl_type ("u8"),
161 babl_component ("R'"),
162 babl_component ("G'"),
163 babl_component ("B'"),
164 NULL);
165 const Babl *yaF_linear = babl_format_new (
166 babl_model ("YA"),
167 babl_type ("float"),
168 babl_component ("Y"),
169 babl_component ("A"),
170 NULL);
171 const Babl *ya8_linear = babl_format_new (
172 babl_model ("YA"),
173 babl_type ("u8"),
174 babl_component ("Y"),
175 babl_component ("A"),
176 NULL);
177 const Babl *yaF_gamma = babl_format_new (
178 babl_model ("Y'A"),
179 babl_type ("float"),
180 babl_component ("Y'"),
181 babl_component ("A"),
182 NULL);
183 const Babl *ya8_gamma = babl_format_new (
184 babl_model ("Y'A"),
185 babl_type ("u8"),
186 babl_component ("Y'"),
187 babl_component ("A"),
188 NULL);
189 const Babl *yF_linear = babl_format_new (
190 babl_model ("Y"),
191 babl_type ("float"),
192 babl_component ("Y"),
193 NULL);
194 const Babl *y8_linear = babl_format_new (
195 babl_model ("Y"),
196 babl_type ("u8"),
197 babl_component ("Y"),
198 NULL);
199 const Babl *yF_gamma = babl_format_new (
200 babl_model ("Y'"),
201 babl_type ("float"),
202 babl_component ("Y'"),
203 NULL);
204 const Babl *y8_gamma = babl_format_new (
205 babl_model ("Y'"),
206 babl_type ("u8"),
207 babl_component ("Y'"),
208 NULL);
210 #define CONV(src, dst) \
212 babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv_ ## src ## _ ## dst, NULL); \
213 babl_conversion_new (src ## _gamma, dst ## _gamma, "linear", conv_ ## src ## _ ## dst, NULL); \
216 if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE4_1))
218 CONV(rgba8, rgbaF);
219 CONV(rgb8, rgbF);
220 CONV(ya8, yaF);
221 CONV(y8, yF);
224 #endif
225 return 0;