extensions/sse4-int8.c

   1 /* babl - dynamically extendable universal pixel conversion library.
   2  * Copyright (C) 2013 Daniel Sabo
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 3 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General
  15  * Public License along with this library; if not, see
  16  * <https://www.gnu.org/licenses/>.
  17  */
  18
  19 #include "config.h"
  20
  21 #if defined(USE_SSE4_1)
  22
  23 /* SSE 4 */
  24 #include <smmintrin.h>
  25
  26 #include <stdint.h>
  27 #include <stdlib.h>
  28
  29 #include "babl.h"
  30 #include "babl-cpuaccel.h"
  31 #include "extensions/util.h"
  32
  33 static inline void
  34 conv_y8_yF (const Babl    *conversion,
  35             const uint8_t *src,
  36             float         *dst,
  37             long           samples)
  38 {
  39   const float     factor = 1.0f / 255.0f;
  40   const __v4sf    factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
  41   const uint32_t *s_vec;
  42   __v4sf         *d_vec;
  43
  44   long n = samples;
  45
  46   s_vec = (const uint32_t *)src;
  47   d_vec = (__v4sf *)dst;
  48
  49   while (n >= 4)
  50     {
  51       __m128i in_val;
  52       __v4sf out_val;
  53       in_val = _mm_insert_epi32 ((__m128i)_mm_setzero_ps(), *s_vec++, 0);
  54       in_val = _mm_cvtepu8_epi32 (in_val);
  55       out_val = _mm_cvtepi32_ps (in_val) * factor_vec;
  56       _mm_storeu_ps ((float *)d_vec++, out_val);
  57       n -= 4;
  58     }
  59
  60   src = (const uint8_t *)s_vec;
  61   dst = (float *)d_vec;
  62
  63   while (n)
  64     {
  65       *dst++ = (float)(*src++) * factor;
  66       n -= 1;
  67     }
  68 }
  69
  70 static void
  71 conv_ya8_yaF (const Babl    *conversion,
  72               const uint8_t *src,
  73               float         *dst,
  74               long           samples)
  75 {
  76   conv_y8_yF (conversion, src, dst, samples * 2);
  77 }
  78
  79 static void
  80 conv_rgb8_rgbF (const Babl    *conversion,
  81                 const uint8_t *src,
  82                 float         *dst,
  83                 long           samples)
  84 {
  85   conv_y8_yF (conversion, src, dst, samples * 3);
  86 }
  87
  88 static void
  89 conv_rgba8_rgbaF (const Babl    *conversion,
  90                   const uint8_t *src,
  91                   float         *dst,
  92                   long           samples)
  93 {
  94   conv_y8_yF (conversion, src, dst, samples * 4);
  95 }
  96
  97 #endif
  98
  99 int init (void);
 100
 101 int
 102 init (void)
 103 {
 104 #if defined(USE_SSE4_1)
 105   const Babl *rgbaF_linear = babl_format_new (
 106     babl_model ("RGBA"),
 107     babl_type ("float"),
 108     babl_component ("R"),
 109     babl_component ("G"),
 110     babl_component ("B"),
 111     babl_component ("A"),
 112     NULL);
 113   const Babl *rgba8_linear = babl_format_new (
 114     babl_model ("RGBA"),
 115     babl_type ("u8"),
 116     babl_component ("R"),
 117     babl_component ("G"),
 118     babl_component ("B"),
 119     babl_component ("A"),
 120     NULL);
 121   const Babl *rgbaF_gamma = babl_format_new (
 122     babl_model ("R'G'B'A"),
 123     babl_type ("float"),
 124     babl_component ("R'"),
 125     babl_component ("G'"),
 126     babl_component ("B'"),
 127     babl_component ("A"),
 128     NULL);
 129   const Babl *rgba8_gamma = babl_format_new (
 130     babl_model ("R'G'B'A"),
 131     babl_type ("u8"),
 132     babl_component ("R'"),
 133     babl_component ("G'"),
 134     babl_component ("B'"),
 135     babl_component ("A"),
 136     NULL);
 137   const Babl *rgbF_linear = babl_format_new (
 138     babl_model ("RGB"),
 139     babl_type ("float"),
 140     babl_component ("R"),
 141     babl_component ("G"),
 142     babl_component ("B"),
 143     NULL);
 144   const Babl *rgb8_linear = babl_format_new (
 145     babl_model ("RGB"),
 146     babl_type ("u8"),
 147     babl_component ("R"),
 148     babl_component ("G"),
 149     babl_component ("B"),
 150     NULL);
 151   const Babl *rgbF_gamma = babl_format_new (
 152     babl_model ("R'G'B'"),
 153     babl_type ("float"),
 154     babl_component ("R'"),
 155     babl_component ("G'"),
 156     babl_component ("B'"),
 157     NULL);
 158   const Babl *rgb8_gamma = babl_format_new (
 159     babl_model ("R'G'B'"),
 160     babl_type ("u8"),
 161     babl_component ("R'"),
 162     babl_component ("G'"),
 163     babl_component ("B'"),
 164     NULL);
 165   const Babl *yaF_linear = babl_format_new (
 166     babl_model ("YA"),
 167     babl_type ("float"),
 168     babl_component ("Y"),
 169     babl_component ("A"),
 170     NULL);
 171   const Babl *ya8_linear = babl_format_new (
 172     babl_model ("YA"),
 173     babl_type ("u8"),
 174     babl_component ("Y"),
 175     babl_component ("A"),
 176     NULL);
 177   const Babl *yaF_gamma = babl_format_new (
 178     babl_model ("Y'A"),
 179     babl_type ("float"),
 180     babl_component ("Y'"),
 181     babl_component ("A"),
 182     NULL);
 183   const Babl *ya8_gamma = babl_format_new (
 184     babl_model ("Y'A"),
 185     babl_type ("u8"),
 186     babl_component ("Y'"),
 187     babl_component ("A"),
 188     NULL);
 189   const Babl *yF_linear = babl_format_new (
 190     babl_model ("Y"),
 191     babl_type ("float"),
 192     babl_component ("Y"),
 193     NULL);
 194   const Babl *y8_linear = babl_format_new (
 195     babl_model ("Y"),
 196     babl_type ("u8"),
 197     babl_component ("Y"),
 198     NULL);
 199   const Babl *yF_gamma = babl_format_new (
 200     babl_model ("Y'"),
 201     babl_type ("float"),
 202     babl_component ("Y'"),
 203     NULL);
 204   const Babl *y8_gamma = babl_format_new (
 205     babl_model ("Y'"),
 206     babl_type ("u8"),
 207     babl_component ("Y'"),
 208     NULL);
 209
 210 #define CONV(src, dst) \
 211 { \
 212   babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv_ ## src ## _ ## dst, NULL); \
 213   babl_conversion_new (src ## _gamma, dst ## _gamma, "linear", conv_ ## src ## _ ## dst, NULL); \
 214 }
 215
 216   if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE4_1))
 217     {
 218       CONV(rgba8, rgbaF);
 219       CONV(rgb8,  rgbF);
 220       CONV(ya8,   yaF);
 221       CONV(y8,    yF);
 222     }
 223
 224 #endif
 225   return 0;
 226 }
 227