babl: fix some annotation to make the function usable in bindings.
[babl.git] / extensions / fast-float.c
blob659d60f68ed4da09e8559bee61c38d567786a5e9
1 /* babl - dynamically extendable universal pixel conversion library.
2 * Copyright (C) 2012, Øyvind Kolås
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 3 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General
15 * Public License along with this library; if not, see
16 * <https://www.gnu.org/licenses/>.
19 #include "config.h"
21 #include <stdint.h>
22 #include <stdlib.h>
24 #include "babl.h"
25 #include "babl-cpuaccel.h"
26 #include "extensions/util.h"
27 #include "base/util.h"
29 #define LSHIFT 4
31 typedef float (* BablLookupFunction) (float value,
32 void *data);
33 #define babl_LOOKUP_MAX_ENTRIES (819200)
35 typedef struct BablLookup
37 BablLookupFunction function;
38 void *data;
39 int shift;
40 uint32_t positive_min, positive_max, negative_min, negative_max;
41 uint32_t bitmask[babl_LOOKUP_MAX_ENTRIES/32];
42 int entries;
43 float table[];
44 } BablLookup;
47 static BablLookup *babl_lookup_new (BablLookupFunction function,
48 void * data,
49 float start,
50 float end,
51 float precision);
52 #if 0
53 static void babl_lookup_free (BablLookup *lookup);
54 #endif
56 #include <string.h>
58 static inline float
59 babl_lookup (BablLookup *lookup,
60 float number)
62 union { float f; uint32_t i; } u;
63 union { float f; uint32_t i; } ub;
64 union { float f; uint32_t i; } ua;
66 uint32_t i;
67 float dx = 0.0;
69 u.f = number;
70 i = (u.i << LSHIFT ) >> lookup->shift;
72 if (i > lookup->positive_min && i < lookup->positive_max)
74 ua.i = ((i) << lookup->shift) >> LSHIFT;
75 ub.i = ((i+ 1) << lookup->shift) >> LSHIFT;
77 i = i - lookup->positive_min;
79 else if (i > lookup->negative_min && i < lookup->negative_max)
82 ua.i = ((i) << lookup->shift) >> LSHIFT;
83 ub.i = ((i+ 1) << lookup->shift) >> LSHIFT;
85 i = i - lookup->negative_min + (lookup->positive_max - lookup->positive_min);
87 else
89 return lookup->function (number, lookup->data);
93 uint32_t bm =u.i & 0b11110000000000000000000000000000;
94 ua.i |= bm;
95 ub.i |= bm;
97 dx = (u.f-ua.f) / (ub.f - ua.f);
101 if (!(lookup->bitmask[i/32] & (1UL<<(i & 31))))
103 lookup->table[i]= lookup->function (ua.f, lookup->data);
104 lookup->bitmask[i/32] |= (1UL<<(i & 31));
106 i++;
107 if (i< lookup->entries-2)
109 if (!(lookup->bitmask[i/32] & (1UL<<(i & 31))))
111 lookup->table[i]= lookup->function (ub.f, lookup->data);
112 lookup->bitmask[i/32] |= (1UL<<(i & 31));
115 return lookup->table[i-1] * (1.0f-dx) +
116 lookup->table[i] * (dx);
118 else
120 return lookup->table[i-1];
125 static BablLookup *
126 babl_lookup_new (BablLookupFunction function,
127 void * data,
128 float start,
129 float end,
130 float precision)
132 BablLookup *lookup;
133 union
135 float f;
136 uint32_t i;
137 } u;
138 int positive_min, positive_max, negative_min, negative_max;
139 int shift;
141 /* normalize input parameters */
142 if (start > end)
143 { /* swap */
144 u.f = start;
145 start = end;
146 end = u.f;
149 if (precision <= 0.000005f) shift = 0; /* checked for later */
150 else if (precision <= 0.000010f) shift = 8;
151 else if (precision <= 0.000020f) shift = 9;
152 else if (precision <= 0.000040f) shift = 10;
153 else if (precision <= 0.000081f) shift = 11;
154 else if (precision <= 0.000161f) shift = 12;
155 else if (precision <= 0.000200f) shift = 13;
156 else if (precision <= 0.000324f) shift = 14;
157 else if (precision <= 0.000649f) shift = 15;
158 else shift = 16; /* a bit better than 8bit sRGB quality */
161 /* Adjust slightly away from 0.0, saving many entries close to 0, this
162 * causes lookups very close to zero to be passed directly to the
163 * function instead.
165 if (start == 0.0f)
166 start = precision;
167 if (end == 0.0f)
168 end = -precision;
170 /* Compute start and */
172 if (start < 0.0f || end < 0.0f)
174 if (end < 0.0f)
176 u.f = start;
177 positive_max = (u.i << LSHIFT) >> shift;
178 u.f = end;
179 positive_min = (u.i << LSHIFT) >> shift;
180 negative_min = positive_max;
181 negative_max = positive_max;
183 else
185 u.f = 0 - precision;
186 positive_min = (u.i << LSHIFT) >> shift;
187 u.f = start;
188 positive_max = (u.i << LSHIFT) >> shift;
190 u.f = 0 + precision;
191 negative_min = (u.i << LSHIFT) >> shift;
192 u.f = end;
193 negative_max = (u.i << LSHIFT) >> shift;
196 else
198 u.f = start;
199 positive_min = (u.i << LSHIFT) >> shift;
200 u.f = end;
201 positive_max = (u.i << LSHIFT) >> shift;
202 negative_min = positive_max;
203 negative_max = positive_max;
206 if (shift == 0) /* short circuit, do not use ranges */
208 positive_min = positive_max = negative_min = negative_max = 0;
211 if ((positive_max-positive_min) + (negative_max-negative_min) > babl_LOOKUP_MAX_ENTRIES)
213 /* Reduce the size of the cache tables to fit within the bittable
214 * budget (the maximum allocation is around 2.18mb of memory
217 int diff = (positive_max-positive_min) + (negative_max-negative_min) - babl_LOOKUP_MAX_ENTRIES;
219 if (negative_max - negative_min > 0)
221 if (negative_max - negative_min >= diff)
223 negative_max -= diff;
224 diff = 0;
226 else
228 diff -= negative_max - negative_min;
229 negative_max = negative_min;
232 if (diff)
233 positive_max-=diff;
236 lookup = calloc (sizeof (BablLookup) + sizeof (float) *
237 ((positive_max-positive_min)+
238 (negative_max-negative_min)), 1);
240 lookup->positive_min = positive_min;
241 lookup->positive_max = positive_max;
242 lookup->negative_min = negative_min;
243 lookup->negative_max = negative_max;
244 lookup->shift = shift;
245 lookup->function = function;
246 lookup->data = data;
248 lookup->entries = (positive_max-positive_min)+
249 (negative_max-negative_min);
251 return lookup;
254 static BablLookup *fast_pow = NULL;
256 static inline float core_lookup (float val, void *userdata)
258 return babl_linear_to_gamma_2_2f (val);
261 static float
262 linear_to_gamma_2_2_lut (float val)
264 return babl_lookup (fast_pow, val);
267 static BablLookup *fast_rpow = NULL;
269 static inline float core_rlookup (float val, void *userdata)
271 return babl_gamma_2_2_to_linearf (val);
274 static float
275 gamma_2_2_to_linear_lut (float val)
277 return babl_lookup (fast_rpow, val);
280 #if 0
281 static void
282 babl_lookup_free (BablLookup *lookup)
284 free (lookup);
286 #endif
288 static void
289 conv_rgbaF_linear_rgbAF_gamma (const Babl *conversion,
290 unsigned char *src,
291 unsigned char *dst,
292 long samples)
294 float *fsrc = (float *) src;
295 float *fdst = (float *) dst;
296 int n = samples;
298 while (n--)
300 float red = *fsrc++;
301 float green = *fsrc++;
302 float blue = *fsrc++;
303 float alpha = *fsrc++;
304 if (alpha == 1.0f)
306 *fdst++ = linear_to_gamma_2_2_lut (red);
307 *fdst++ = linear_to_gamma_2_2_lut (green);
308 *fdst++ = linear_to_gamma_2_2_lut (blue);
309 *fdst++ = alpha;
311 else
313 float used_alpha = babl_epsilon_for_zero_float (alpha);
314 *fdst++ = linear_to_gamma_2_2_lut (red) * used_alpha;
315 *fdst++ = linear_to_gamma_2_2_lut (green) * used_alpha;
316 *fdst++ = linear_to_gamma_2_2_lut (blue) * used_alpha;
317 *fdst++ = alpha;
324 static void
325 conv_rgbaF_linear_rgba8_gamma (const Babl *conversion,
326 unsigned char *src,
327 unsigned char *dst,
328 long samples)
330 float *fsrc = (float *) src;
331 uint8_t *cdst = (uint8_t *) dst;
332 int n = samples;
334 while (n--)
336 float red = *fsrc++;
337 float green = *fsrc++;
338 float blue = *fsrc++;
339 float alpha = *fsrc++;
340 if (alpha <= 0) /* XXX: we need to drop alpha!! ? */
342 *cdst++ = 0;
343 *cdst++ = 0;
344 *cdst++ = 0;
345 *cdst++ = 0;
347 else
349 int val = linear_to_gamma_2_2_lut (red) * 0xff + 0.5f;
350 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
351 val = linear_to_gamma_2_2_lut (green) * 0xff + 0.5f;
352 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
353 val = linear_to_gamma_2_2_lut (blue) * 0xff + 0.5f;
354 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
355 val = alpha * 0xff + 0.5f;
356 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
361 static void
362 conv_rgbaF_linear_rgbA8_gamma (const Babl *conversion,
363 unsigned char *src,
364 unsigned char *dst,
365 long samples)
367 float *fsrc = (float *) src;
368 uint8_t *cdst = (uint8_t *) dst;
369 int n = samples;
371 while (n--)
373 float red = *fsrc++;
374 float green = *fsrc++;
375 float blue = *fsrc++;
376 float alpha = *fsrc++;
377 if (alpha >= 1.0f)
379 int val = linear_to_gamma_2_2_lut (red) * 0xff + 0.5f;
380 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
381 val = linear_to_gamma_2_2_lut (green) * 0xff + 0.5f;
382 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
383 val = linear_to_gamma_2_2_lut (blue) * 0xff + 0.5f;
384 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
385 *cdst++ = 0xff;
387 else
389 float balpha = alpha * 0xff;
390 int val = linear_to_gamma_2_2_lut (red) * balpha + 0.5f;
391 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
392 val = linear_to_gamma_2_2_lut (green) * balpha + 0.5f;
393 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
394 val = linear_to_gamma_2_2_lut (blue) * balpha + 0.5f;
395 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
396 *cdst++ = balpha + 0.5f;
401 static void
402 conv_yaF_linear_rgbA8_gamma (const Babl *conversion,unsigned char *src,
403 unsigned char *dst,
404 long samples)
406 float *fsrc = (float *) src;
407 uint8_t *cdst = (uint8_t *) dst;
408 int n = samples;
410 while (n--)
412 float gray = *fsrc++;
413 float alpha = *fsrc++;
414 if (alpha >= 1.0f)
416 int val = linear_to_gamma_2_2_lut (gray) * 0xff + 0.5f;
417 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
418 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
419 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
420 *cdst++ = 0xff;
422 else if (alpha <= 0.0f)
424 *((uint32_t*)(cdst))=0;
425 cdst+=4;
427 else
429 float balpha = alpha * 0xff;
430 int val = linear_to_gamma_2_2_lut (gray) * balpha + 0.5f;
431 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
432 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
433 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
434 *cdst++ = balpha + 0.5f;
441 static void
442 conv_rgbaF_linear_rgbA8_gamma_cairo (const Babl *conversion,unsigned char *src,
443 unsigned char *dst,
444 long samples)
446 float *fsrc = (float *) src;
447 unsigned char *cdst = (unsigned char *) dst;
448 int n = samples;
450 while (n--)
452 float red = *fsrc++;
453 float green = *fsrc++;
454 float blue = *fsrc++;
455 float alpha = *fsrc++;
456 if (alpha >= 1.0f)
458 int val = linear_to_gamma_2_2_lut (blue) * 0xff + 0.5f;
459 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
460 val = linear_to_gamma_2_2_lut (green) * 0xff + 0.5f;
461 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
462 val = linear_to_gamma_2_2_lut (red) * 0xff + 0.5f;
463 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
464 *cdst++ = 0xff;
466 else
468 float balpha = alpha * 0xff;
469 int val = linear_to_gamma_2_2_lut (blue) * balpha + 0.5f;
470 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
471 val = linear_to_gamma_2_2_lut (green) * balpha + 0.5f;
472 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
473 val = linear_to_gamma_2_2_lut (red) * balpha + 0.5f;
474 *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val;
475 *cdst++ = balpha + 0.5f;
480 static void
481 conv_rgbAF_linear_rgbAF_gamma (const Babl *conversion,
482 unsigned char *src,
483 unsigned char *dst,
484 long samples)
486 float *fsrc = (float *) src;
487 float *fdst = (float *) dst;
488 int n = samples;
490 while (n--)
492 float red = *fsrc++;
493 float green = *fsrc++;
494 float blue = *fsrc++;
495 float alpha = *fsrc++;
497 if (alpha == 1.0f)
499 *fdst++ = linear_to_gamma_2_2_lut (red);
500 *fdst++ = linear_to_gamma_2_2_lut (green);
501 *fdst++ = linear_to_gamma_2_2_lut (blue);
502 *fdst++ = *fsrc++;
504 else
506 float alpha_recip = 1.0f / alpha;
507 *fdst++ = linear_to_gamma_2_2_lut (red * alpha_recip) * alpha;
508 *fdst++ = linear_to_gamma_2_2_lut (green * alpha_recip) * alpha;
509 *fdst++ = linear_to_gamma_2_2_lut (blue * alpha_recip) * alpha;
510 *fdst++ = alpha;
515 static void
516 conv_rgbaF_linear_rgbaF_gamma (const Babl *conversion,
517 unsigned char *src,
518 unsigned char *dst,
519 long samples)
521 float *fsrc = (float *) src;
522 float *fdst = (float *) dst;
523 int n = samples;
525 while (n--)
527 *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
528 *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
529 *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
530 *fdst++ = *fsrc++;
534 static void
535 conv_rgbF_linear_rgbF_gamma (const Babl *conversion,unsigned char *src,
536 unsigned char *dst,
537 long samples)
539 float *fsrc = (float *) src;
540 float *fdst = (float *) dst;
541 int n = samples;
543 while (n--)
545 *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
546 *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
547 *fdst++ = linear_to_gamma_2_2_lut (*fsrc++);
552 static void
553 conv_rgbaF_gamma_rgbaF_linear (const Babl *conversion,
554 unsigned char *src,
555 unsigned char *dst,
556 long samples)
558 float *fsrc = (float *) src;
559 float *fdst = (float *) dst;
560 int n = samples;
562 while (n--)
564 *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
565 *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
566 *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
567 *fdst++ = *fsrc++;
571 static void
572 conv_rgbF_gamma_rgbF_linear (const Babl *conversion,
573 unsigned char *src,
574 unsigned char *dst,
575 long samples)
577 float *fsrc = (float *) src;
578 float *fdst = (float *) dst;
579 int n = samples;
581 while (n--)
583 *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
584 *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
585 *fdst++ = gamma_2_2_to_linear_lut (*fsrc++);
589 #define o(src, dst) \
590 babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
592 #include "babl-verify-cpu.inc"
594 int init (void);
596 init (void)
598 BABL_VERIFY_CPU();
600 const Babl *yaF_linear = babl_format_new (
601 babl_model ("YA"),
602 babl_type ("float"),
603 babl_component ("Y"),
604 babl_component ("A"),
605 NULL);
607 const Babl *rgbaF_linear = babl_format_new (
608 babl_model ("RGBA"),
609 babl_type ("float"),
610 babl_component ("R"),
611 babl_component ("G"),
612 babl_component ("B"),
613 babl_component ("A"),
614 NULL);
615 const Babl *rgbAF_linear = babl_format_new (
616 babl_model ("RaGaBaA"),
617 babl_type ("float"),
618 babl_component ("Ra"),
619 babl_component ("Ga"),
620 babl_component ("Ba"),
621 babl_component ("A"),
622 NULL);
623 const Babl *rgbaF_gamma = babl_format_new (
624 babl_model ("R'G'B'A"),
625 babl_type ("float"),
626 babl_component ("R'"),
627 babl_component ("G'"),
628 babl_component ("B'"),
629 babl_component ("A"),
630 NULL);
631 const Babl *rgbAF_gamma = babl_format_new (
632 babl_model ("R'aG'aB'aA"),
633 babl_type ("float"),
634 babl_component ("R'a"),
635 babl_component ("G'a"),
636 babl_component ("B'a"),
637 babl_component ("A"),
638 NULL);
640 const Babl *rgbA8_gamma = babl_format_new (
641 babl_model ("R'aG'aB'aA"),
642 babl_type ("u8"),
643 babl_component ("R'a"),
644 babl_component ("G'a"),
645 babl_component ("B'a"),
646 babl_component ("A"),
647 NULL);
649 const Babl *rgba8_gamma = babl_format_new (
650 babl_model ("R'G'B'A"),
651 babl_type ("u8"),
652 babl_component ("R'"),
653 babl_component ("G'"),
654 babl_component ("B'"),
655 babl_component ("A"),
656 NULL);
658 const Babl *rgbF_linear = babl_format_new (
659 babl_model ("RGB"),
660 babl_type ("float"),
661 babl_component ("R"),
662 babl_component ("G"),
663 babl_component ("B"),
664 NULL);
665 const Babl *rgbF_gamma = babl_format_new (
666 babl_model ("R'G'B'"),
667 babl_type ("float"),
668 babl_component ("R'"),
669 babl_component ("G'"),
670 babl_component ("B'"),
671 NULL);
673 return 0; // XXX: the fast paths registered here doesn't correctly
674 // clamp negative values - disabling for now
676 float f;
677 float a;
679 /* tweaking the precision - does impact speed.. */
680 fast_pow = babl_lookup_new (core_lookup, NULL, 0.0f, 1.0f, 0.000199f);
681 fast_rpow = babl_lookup_new (core_rlookup, NULL, 0.0f, 1.0f, 0.000250f);
683 for (f = 0.0; f < 1.0f; f+= 0.0000001f)
685 a = linear_to_gamma_2_2_lut (f);
686 a = gamma_2_2_to_linear_lut (f);
688 if (a < -10)
689 f = 2;
695 const Babl *f32 = babl_format_new (
696 "name", "cairo-ARGB32",
697 babl_model ("R'aG'aB'aA"),
698 babl_type ("u8"),
699 babl_component ("B'a"),
700 babl_component ("G'a"),
701 babl_component ("R'a"),
702 babl_component ("A"),
703 NULL
707 babl_conversion_new (rgbaF_linear, f32, "linear", conv_rgbaF_linear_rgbA8_gamma_cairo, NULL);
710 o (rgbaF_linear, rgbA8_gamma);
711 o (rgbAF_linear, rgbAF_gamma);
712 o (rgbaF_linear, rgbAF_gamma);
713 o (rgbaF_linear, rgbaF_gamma);
714 o (rgbaF_linear, rgba8_gamma);
715 o (rgbaF_gamma, rgbaF_linear);
716 o (rgbF_linear, rgbF_gamma);
717 o (rgbF_gamma, rgbF_linear);
718 o (yaF_linear, rgbA8_gamma);
720 return 0;
723 void destroy (void);
725 void
726 destroy (void)
728 free (fast_rpow);
729 free (fast_pow);