third_party/qcms/google.patch

   1 diff --git a/third_party/qcms/src/iccread.c b/third_party/qcms/src/iccread.c
   2 index 36b7011..d3c3dfe 100644
   3 --- a/third_party/qcms/src/iccread.c
   4 +++ b/third_party/qcms/src/iccread.c
   5 @@ -266,7 +266,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
   6         if (profile->color_space != RGB_SIGNATURE)
   7                return false;
   8
   9 -       if (profile->A2B0 || profile->B2A0)
  10 +       if (qcms_supports_iccv4 && (profile->A2B0 || profile->B2A0))
  11                 return false;
  12
  13         rX = s15Fixed16Number_to_float(profile->redColorant.X);
  14 @@ -297,6 +297,11 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
  15         sum[1] = rY + gY + bY;
  16         sum[2] = rZ + gZ + bZ;
  17
  18 +#if defined (_MSC_VER)
  19 +#pragma warning(push)
  20 +/* Disable double to float truncation warning 4305 */
  21 +#pragma warning(disable:4305)
  22 +#endif
  23         // Build our target vector (see mozilla bug 460629)
  24         target[0] = 0.96420;
  25         target[1] = 1.00000;
  26 @@ -310,6 +315,10 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
  27         tolerance[1] = 0.02;
  28         tolerance[2] = 0.04;
  29
  30 +#if defined (_MSC_VER)
  31 +/* Restore warnings */
  32 +#pragma warning(pop)
  33 +#endif
  34         // Compare with our tolerance
  35         for (i = 0; i < 3; ++i) {
  36             if (!(((sum[i] - tolerance[i]) <= target[i]) &&
  37 @@ -402,7 +411,7 @@ static struct XYZNumber read_tag_XYZType(struct mem_source *src, struct tag_inde
  38  // present that are not part of the tag_index.
  39  static struct curveType *read_curveType(struct mem_source *src, uint32_t offset, uint32_t *len)
  40  {
  41 -       static const size_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
  42 +       static const uint32_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
  43         struct curveType *curve = NULL;
  44         uint32_t type = read_u32(src, offset);
  45         uint32_t count;
  46 @@ -657,7 +666,7 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
  47         uint16_t num_input_table_entries;
  48         uint16_t num_output_table_entries;
  49         uint8_t in_chan, grid_points, out_chan;
  50 -       uint32_t clut_offset, output_offset;
  51 +       size_t clut_offset, output_offset;
  52         uint32_t clut_size;
  53         size_t entry_size;
  54         struct lutType *lut;
  55 diff --git a/third_party/qcms/src/qcms.h b/third_party/qcms/src/qcms.h
  56 index 7d83623..1e3e125 100644
  57 --- a/third_party/qcms/src/qcms.h
  58 +++ b/third_party/qcms/src/qcms.h
  59 @@ -102,6 +102,12 @@ typedef enum {
  60         QCMS_DATA_GRAYA_8
  61  } qcms_data_type;
  62
  63 +/* Format of the output data for qcms_transform_data_type() */
  64 +typedef enum {
  65 +       QCMS_OUTPUT_RGBX,
  66 +       QCMS_OUTPUT_BGRX
  67 +} qcms_output_type;
  68 +
  69  /* the names for the following two types are sort of ugly */
  70  typedef struct
  71  {
  72 @@ -146,6 +152,7 @@ qcms_transform* qcms_transform_create(
  73  void qcms_transform_release(qcms_transform *);
  74
  75  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length);
  76 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type);
  77
  78  void qcms_enable_iccv4();
  79
  80 diff --git a/third_party/qcms/src/qcmsint.h b/third_party/qcms/src/qcmsint.h
  81 index 53a3420..63905de 100644
  82 --- a/third_party/qcms/src/qcmsint.h
  83 +++ b/third_party/qcms/src/qcmsint.h
  84 @@ -45,6 +45,11 @@ struct precache_output
  85  #define ALIGN __attribute__(( aligned (16) ))
  86  #endif
  87
  88 +typedef struct _qcms_format_type {
  89 +       int r;
  90 +       int b;
  91 +} qcms_format_type;
  92 +
  93  struct _qcms_transform {
  94         float ALIGN matrix[3][4];
  95         float *input_gamma_table_r;
  96 @@ -88,7 +93,7 @@ struct _qcms_transform {
  97         struct precache_output *output_table_g;
  98         struct precache_output *output_table_b;
  99
 100 -       void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length);
 101 +       void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, struct _qcms_format_type output_format);
 102  };
 103
 104  struct matrix {
 105 @@ -280,18 +285,22 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
 106  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 107                                            unsigned char *src,
 108                                            unsigned char *dest,
 109 -                                          size_t length);
 110 +                                          size_t length,
 111 +                                          qcms_format_type output_format);
 112  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 113                                            unsigned char *src,
 114                                            unsigned char *dest,
 115 -                                          size_t length);
 116 +                                          size_t length,
 117 +                                          qcms_format_type output_format);
 118  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 119                                            unsigned char *src,
 120                                            unsigned char *dest,
 121 -                                          size_t length);
 122 +                                          size_t length,
 123 +                                          qcms_format_type output_format);
 124  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 125                                            unsigned char *src,
 126                                            unsigned char *dest,
 127 -                                          size_t length);
 128 +                                          size_t length,
 129 +                                          qcms_format_type output_format);
 130
 131  extern qcms_bool qcms_supports_iccv4;
 132 diff --git a/third_party/qcms/src/qcmstypes.h b/third_party/qcms/src/qcmstypes.h
 133 index 56d8de3..9a9b197 100644
 134 --- a/third_party/qcms/src/qcmstypes.h
 135 +++ b/third_party/qcms/src/qcmstypes.h
 136 @@ -87,7 +87,12 @@ typedef unsigned __int64 uint64_t;
 137  #ifdef _WIN64
 138  typedef unsigned __int64 uintptr_t;
 139  #else
 140 +#pragma warning(push)
 141 +/* Disable benign redefinition of type warning 4142 */
 142 +#pragma warning(disable:4142)
 143  typedef unsigned long uintptr_t;
 144 +/* Restore warnings */
 145 +#pragma warning(pop)
 146  #endif
 147
 148  #elif defined (_AIX)
 149 diff --git a/third_party/qcms/src/transform-sse1.c b/third_party/qcms/src/transform-sse1.c
 150 index 2f34db5..aaee1bf 100644
 151 --- a/third_party/qcms/src/transform-sse1.c
 152 +++ b/third_party/qcms/src/transform-sse1.c
 153 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
 154  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 155                                            unsigned char *src,
 156                                            unsigned char *dest,
 157 -                                          size_t length)
 158 +                                          size_t length,
 159 +                                          qcms_format_type output_format)
 160  {
 161      unsigned int i;
 162      float (*mat)[4] = transform->matrix;
 163 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 164
 165      /* working variables */
 166      __m128 vec_r, vec_g, vec_b, result;
 167 +    const int r_out = output_format.r;
 168 +    const int b_out = output_format.b;
 169
 170      /* CYA */
 171      if (!length)
 172 @@ -116,9 +119,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 173          src += 3;
 174
 175          /* use calc'd indices to output RGB values */
 176 -        dest[0] = otdata_r[output[0]];
 177 -        dest[1] = otdata_g[output[1]];
 178 -        dest[2] = otdata_b[output[2]];
 179 +        dest[r_out] = otdata_r[output[0]];
 180 +        dest[1]     = otdata_g[output[1]];
 181 +        dest[b_out] = otdata_b[output[2]];
 182          dest += 3;
 183      }
 184
 185 @@ -141,9 +144,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 186      result = _mm_movehl_ps(result, result);
 187      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
 188
 189 -    dest[0] = otdata_r[output[0]];
 190 -    dest[1] = otdata_g[output[1]];
 191 -    dest[2] = otdata_b[output[2]];
 192 +    dest[r_out] = otdata_r[output[0]];
 193 +    dest[1]     = otdata_g[output[1]];
 194 +    dest[b_out] = otdata_b[output[2]];
 195
 196      _mm_empty();
 197  }
 198 @@ -151,7 +154,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 199  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 200                                             unsigned char *src,
 201                                             unsigned char *dest,
 202 -                                           size_t length)
 203 +                                           size_t length,
 204 +                                           qcms_format_type output_format)
 205  {
 206      unsigned int i;
 207      float (*mat)[4] = transform->matrix;
 208 @@ -187,6 +191,8 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 209
 210      /* working variables */
 211      __m128 vec_r, vec_g, vec_b, result;
 212 +    const int r_out = output_format.r;
 213 +    const int b_out = output_format.b;
 214      unsigned char alpha;
 215
 216      /* CYA */
 217 @@ -239,9 +245,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 218          src += 4;
 219
 220          /* use calc'd indices to output RGB values */
 221 -        dest[0] = otdata_r[output[0]];
 222 -        dest[1] = otdata_g[output[1]];
 223 -        dest[2] = otdata_b[output[2]];
 224 +        dest[r_out] = otdata_r[output[0]];
 225 +        dest[1]     = otdata_g[output[1]];
 226 +        dest[b_out] = otdata_b[output[2]];
 227          dest += 4;
 228      }
 229
 230 @@ -266,9 +272,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 231      result = _mm_movehl_ps(result, result);
 232      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
 233
 234 -    dest[0] = otdata_r[output[0]];
 235 -    dest[1] = otdata_g[output[1]];
 236 -    dest[2] = otdata_b[output[2]];
 237 +    dest[r_out] = otdata_r[output[0]];
 238 +    dest[1]     = otdata_g[output[1]];
 239 +    dest[b_out] = otdata_b[output[2]];
 240
 241      _mm_empty();
 242  }
 243 diff --git a/third_party/qcms/src/transform-sse2.c b/third_party/qcms/src/transform-sse2.c
 244 index 6a5faf9..fa7f2d1 100644
 245 --- a/third_party/qcms/src/transform-sse2.c
 246 +++ b/third_party/qcms/src/transform-sse2.c
 247 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
 248  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 249                                            unsigned char *src,
 250                                            unsigned char *dest,
 251 -                                          size_t length)
 252 +                                          size_t length,
 253 +                                          qcms_format_type output_format)
 254  {
 255      unsigned int i;
 256      float (*mat)[4] = transform->matrix;
 257 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 258
 259      /* working variables */
 260      __m128 vec_r, vec_g, vec_b, result;
 261 +    const int r_out = output_format.r;
 262 +    const int b_out = output_format.b;
 263
 264      /* CYA */
 265      if (!length)
 266 @@ -114,9 +117,9 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 267          src += 3;
 268
 269          /* use calc'd indices to output RGB values */
 270 -        dest[0] = otdata_r[output[0]];
 271 -        dest[1] = otdata_g[output[1]];
 272 -        dest[2] = otdata_b[output[2]];
 273 +        dest[r_out] = otdata_r[output[0]];
 274 +        dest[1]     = otdata_g[output[1]];
 275 +        dest[b_out] = otdata_b[output[2]];
 276          dest += 3;
 277      }
 278
 279 @@ -137,15 +140,16 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 280
 281      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
 282
 283 -    dest[0] = otdata_r[output[0]];
 284 -    dest[1] = otdata_g[output[1]];
 285 -    dest[2] = otdata_b[output[2]];
 286 +    dest[r_out] = otdata_r[output[0]];
 287 +    dest[1]     = otdata_g[output[1]];
 288 +    dest[b_out] = otdata_b[output[2]];
 289  }
 290
 291  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 292                                             unsigned char *src,
 293                                             unsigned char *dest,
 294 -                                           size_t length)
 295 +                                           size_t length,
 296 +                                           qcms_format_type output_format)
 297  {
 298      unsigned int i;
 299      float (*mat)[4] = transform->matrix;
 300 @@ -181,6 +185,8 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 301
 302      /* working variables */
 303      __m128 vec_r, vec_g, vec_b, result;
 304 +    const int r_out = output_format.r;
 305 +    const int b_out = output_format.b;
 306      unsigned char alpha;
 307
 308      /* CYA */
 309 @@ -231,9 +237,9 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 310          src += 4;
 311
 312          /* use calc'd indices to output RGB values */
 313 -        dest[0] = otdata_r[output[0]];
 314 -        dest[1] = otdata_g[output[1]];
 315 -        dest[2] = otdata_b[output[2]];
 316 +        dest[r_out] = otdata_r[output[0]];
 317 +        dest[1]     = otdata_g[output[1]];
 318 +        dest[b_out] = otdata_b[output[2]];
 319          dest += 4;
 320      }
 321
 322 @@ -256,7 +262,7 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 323
 324      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
 325
 326 -    dest[0] = otdata_r[output[0]];
 327 -    dest[1] = otdata_g[output[1]];
 328 -    dest[2] = otdata_b[output[2]];
 329 +    dest[r_out] = otdata_r[output[0]];
 330 +    dest[1]     = otdata_g[output[1]];
 331 +    dest[b_out] = otdata_b[output[2]];
 332  }
 333 diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c
 334 index 9a6562b..7312ced 100644
 335 --- a/third_party/qcms/src/transform.c
 336 +++ b/third_party/qcms/src/transform.c
 337 @@ -181,11 +181,20 @@ compute_chromatic_adaption(struct CIE_XYZ source_white_point,
 338  static struct matrix
 339  adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
 340  {
 341 +#if defined (_MSC_VER)
 342 +#pragma warning(push)
 343 +/* Disable double to float truncation warning 4305 */
 344 +#pragma warning(disable:4305)
 345 +#endif
 346         struct matrix lam_rigg = {{ // Bradford matrix
 347                                  {  0.8951,  0.2664, -0.1614 },
 348                                  { -0.7502,  1.7135,  0.0367 },
 349                                  {  0.0389, -0.0685,  1.0296 }
 350                                  }};
 351 +#if defined (_MSC_VER)
 352 +/* Restore warnings */
 353 +#pragma warning(pop)
 354 +#endif
 355         return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
 356  }
 357
 358 @@ -230,8 +239,11 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
 359  }
 360
 361  #if 0
 362 -static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 363 +static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 364  {
 365 +       const int r_out = output_format.r;
 366 +       const int b_out = output_format.b;
 367 +
 368         int i;
 369         float (*mat)[4] = transform->matrix;
 370         for (i=0; i<length; i++) {
 371 @@ -251,15 +263,19 @@ static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned
 372                 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
 373                 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
 374
 375 -               *dest++ = clamp_u8(255*out_device_r);
 376 -               *dest++ = clamp_u8(255*out_device_g);
 377 -               *dest++ = clamp_u8(255*out_device_b);
 378 +               dest[r_out] = clamp_u8(out_device_r*255);
 379 +               dest[1]     = clamp_u8(out_device_g*255);
 380 +               dest[b_out] = clamp_u8(out_device_b*255);
 381 +               dest += 3;
 382         }
 383  }
 384  #endif
 385
 386 -static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 387 +static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 388  {
 389 +       const int r_out = output_format.r;
 390 +       const int b_out = output_format.b;
 391 +
 392         unsigned int i;
 393         for (i = 0; i < length; i++) {
 394                 float out_device_r, out_device_g, out_device_b;
 395 @@ -267,13 +283,14 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
 396
 397                 float linear = transform->input_gamma_table_gray[device];
 398
 399 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 400 +               out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 401                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 402                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 403
 404 -               *dest++ = clamp_u8(out_device_r*255);
 405 -               *dest++ = clamp_u8(out_device_g*255);
 406 -               *dest++ = clamp_u8(out_device_b*255);
 407 +               dest[r_out] = clamp_u8(out_device_r*255);
 408 +               dest[1]     = clamp_u8(out_device_g*255);
 409 +               dest[b_out] = clamp_u8(out_device_b*255);
 410 +               dest += 3;
 411         }
 412  }
 413
 414 @@ -283,8 +300,11 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
 415         See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
 416  */
 417
 418 -static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 419 +static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 420  {
 421 +       const int r_out = output_format.r;
 422 +       const int b_out = output_format.b;
 423 +
 424         unsigned int i;
 425         for (i = 0; i < length; i++) {
 426                 float out_device_r, out_device_g, out_device_b;
 427 @@ -293,20 +313,24 @@ static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigne
 428
 429                 float linear = transform->input_gamma_table_gray[device];
 430
 431 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 432 +               out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 433                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 434                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 435
 436 -               *dest++ = clamp_u8(out_device_r*255);
 437 -               *dest++ = clamp_u8(out_device_g*255);
 438 -               *dest++ = clamp_u8(out_device_b*255);
 439 -               *dest++ = alpha;
 440 +               dest[r_out] = clamp_u8(out_device_r*255);
 441 +               dest[1]     = clamp_u8(out_device_g*255);
 442 +               dest[b_out] = clamp_u8(out_device_b*255);
 443 +               dest[3]     = alpha;
 444 +               dest += 4;
 445         }
 446  }
 447
 448
 449 -static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 450 +static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 451  {
 452 +       const int r_out = output_format.r;
 453 +       const int b_out = output_format.b;
 454 +
 455         unsigned int i;
 456         for (i = 0; i < length; i++) {
 457                 unsigned char device = *src++;
 458 @@ -317,14 +341,19 @@ static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns
 459                 /* we could round here... */
 460                 gray = linear * PRECACHE_OUTPUT_MAX;
 461
 462 -               *dest++ = transform->output_table_r->data[gray];
 463 -               *dest++ = transform->output_table_g->data[gray];
 464 -               *dest++ = transform->output_table_b->data[gray];
 465 +               dest[r_out] = transform->output_table_r->data[gray];
 466 +               dest[1]     = transform->output_table_g->data[gray];
 467 +               dest[b_out] = transform->output_table_b->data[gray];
 468 +               dest += 3;
 469         }
 470  }
 471
 472 -static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 473 +
 474 +static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 475  {
 476 +       const int r_out = output_format.r;
 477 +       const int b_out = output_format.b;
 478 +
 479         unsigned int i;
 480         for (i = 0; i < length; i++) {
 481                 unsigned char device = *src++;
 482 @@ -336,15 +365,19 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
 483                 /* we could round here... */
 484                 gray = linear * PRECACHE_OUTPUT_MAX;
 485
 486 -               *dest++ = transform->output_table_r->data[gray];
 487 -               *dest++ = transform->output_table_g->data[gray];
 488 -               *dest++ = transform->output_table_b->data[gray];
 489 -               *dest++ = alpha;
 490 +               dest[r_out] = transform->output_table_r->data[gray];
 491 +               dest[1]     = transform->output_table_g->data[gray];
 492 +               dest[b_out] = transform->output_table_b->data[gray];
 493 +               dest[3]     = alpha;
 494 +               dest += 4;
 495         }
 496  }
 497
 498 -static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 499 +static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 500  {
 501 +       const int r_out = output_format.r;
 502 +       const int b_out = output_format.b;
 503 +
 504         unsigned int i;
 505         float (*mat)[4] = transform->matrix;
 506         for (i = 0; i < length; i++) {
 507 @@ -370,14 +403,18 @@ static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform,
 508                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 509                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 510
 511 -               *dest++ = transform->output_table_r->data[r];
 512 -               *dest++ = transform->output_table_g->data[g];
 513 -               *dest++ = transform->output_table_b->data[b];
 514 +               dest[r_out] = transform->output_table_r->data[r];
 515 +               dest[1]     = transform->output_table_g->data[g];
 516 +               dest[b_out] = transform->output_table_b->data[b];
 517 +               dest += 3;
 518         }
 519  }
 520
 521 -static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 522 +static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 523  {
 524 +       const int r_out = output_format.r;
 525 +       const int b_out = output_format.b;
 526 +
 527         unsigned int i;
 528         float (*mat)[4] = transform->matrix;
 529         for (i = 0; i < length; i++) {
 530 @@ -404,16 +441,21 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
 531                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 532                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 533
 534 -               *dest++ = transform->output_table_r->data[r];
 535 -               *dest++ = transform->output_table_g->data[g];
 536 -               *dest++ = transform->output_table_b->data[b];
 537 -               *dest++ = alpha;
 538 +               dest[r_out] = transform->output_table_r->data[r];
 539 +               dest[1]     = transform->output_table_g->data[g];
 540 +               dest[b_out] = transform->output_table_b->data[b];
 541 +               dest[3]     = alpha;
 542 +               dest += 4;
 543         }
 544  }
 545
 546  // Not used
 547  /*
 548 -static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
 549 +static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 550 +{
 551 +       const int r_out = output_format.r;
 552 +       const int b_out = output_format.b;
 553 +
 554         unsigned int i;
 555         int xy_len = 1;
 556         int x_len = transform->grid_size;
 557 @@ -462,15 +504,20 @@ static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *s
 558                 float b_y2 = lerp(b_x3, b_x4, y_d);
 559                 float clut_b = lerp(b_y1, b_y2, z_d);
 560
 561 -               *dest++ = clamp_u8(clut_r*255.0f);
 562 -               *dest++ = clamp_u8(clut_g*255.0f);
 563 -               *dest++ = clamp_u8(clut_b*255.0f);
 564 -       }
 565 +               dest[r_out] = clamp_u8(clut_r*255.0f);
 566 +               dest[1]     = clamp_u8(clut_g*255.0f);
 567 +               dest[b_out] = clamp_u8(clut_b*255.0f);
 568 +               dest += 3;
 569 +       }
 570  }
 571  */
 572
 573  // Using lcms' tetra interpolation algorithm.
 574 -static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
 575 +static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 576 +{
 577 +       const int r_out = output_format.r;
 578 +       const int b_out = output_format.b;
 579 +
 580         unsigned int i;
 581         int xy_len = 1;
 582         int x_len = transform->grid_size;
 583 @@ -577,15 +624,20 @@ static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsig
 584                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 585                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 586
 587 -               *dest++ = clamp_u8(clut_r*255.0f);
 588 -               *dest++ = clamp_u8(clut_g*255.0f);
 589 -               *dest++ = clamp_u8(clut_b*255.0f);
 590 -               *dest++ = in_a;
 591 -       }
 592 +               dest[r_out] = clamp_u8(clut_r*255.0f);
 593 +               dest[1]     = clamp_u8(clut_g*255.0f);
 594 +               dest[b_out] = clamp_u8(clut_b*255.0f);
 595 +               dest[3]     = in_a;
 596 +               dest += 4;
 597 +       }
 598  }
 599
 600  // Using lcms' tetra interpolation code.
 601 -static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
 602 +static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 603 +{
 604 +       const int r_out = output_format.r;
 605 +       const int b_out = output_format.b;
 606 +
 607         unsigned int i;
 608         int xy_len = 1;
 609         int x_len = transform->grid_size;
 610 @@ -691,14 +743,18 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
 611                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 612                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 613
 614 -               *dest++ = clamp_u8(clut_r*255.0f);
 615 -               *dest++ = clamp_u8(clut_g*255.0f);
 616 -               *dest++ = clamp_u8(clut_b*255.0f);
 617 -       }
 618 +               dest[r_out] = clamp_u8(clut_r*255.0f);
 619 +               dest[1]     = clamp_u8(clut_g*255.0f);
 620 +               dest[b_out] = clamp_u8(clut_b*255.0f);
 621 +               dest += 3;
 622 +       }
 623  }
 624
 625 -static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 626 +static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 627  {
 628 +       const int r_out = output_format.r;
 629 +       const int b_out = output_format.b;
 630 +
 631         unsigned int i;
 632         float (*mat)[4] = transform->matrix;
 633         for (i = 0; i < length; i++) {
 634 @@ -726,14 +782,18 @@ static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned
 635                 out_device_b = lut_interp_linear(out_linear_b,
 636                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 637
 638 -               *dest++ = clamp_u8(out_device_r*255);
 639 -               *dest++ = clamp_u8(out_device_g*255);
 640 -               *dest++ = clamp_u8(out_device_b*255);
 641 +               dest[r_out] = clamp_u8(out_device_r*255);
 642 +               dest[1]     = clamp_u8(out_device_g*255);
 643 +               dest[b_out] = clamp_u8(out_device_b*255);
 644 +               dest += 3;
 645         }
 646  }
 647
 648 -static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 649 +static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 650  {
 651 +       const int r_out = output_format.r;
 652 +       const int b_out = output_format.b;
 653 +
 654         unsigned int i;
 655         float (*mat)[4] = transform->matrix;
 656         for (i = 0; i < length; i++) {
 657 @@ -762,16 +822,20 @@ static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned
 658                 out_device_b = lut_interp_linear(out_linear_b,
 659                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 660
 661 -               *dest++ = clamp_u8(out_device_r*255);
 662 -               *dest++ = clamp_u8(out_device_g*255);
 663 -               *dest++ = clamp_u8(out_device_b*255);
 664 -               *dest++ = alpha;
 665 +               dest[r_out] = clamp_u8(out_device_r*255);
 666 +               dest[1]     = clamp_u8(out_device_g*255);
 667 +               dest[b_out] = clamp_u8(out_device_b*255);
 668 +               dest[3]     = alpha;
 669 +               dest += 4;
 670         }
 671  }
 672
 673  #if 0
 674 -static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 675 +static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 676  {
 677 +       const int r_out = output_format.r;
 678 +       const int b_out = output_format.b;
 679 +
 680         int i;
 681         float (*mat)[4] = transform->matrix;
 682         for (i = 0; i < length; i++) {
 683 @@ -787,9 +851,10 @@ static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsign
 684                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 685                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 686
 687 -               *dest++ = clamp_u8(out_linear_r*255);
 688 -               *dest++ = clamp_u8(out_linear_g*255);
 689 -               *dest++ = clamp_u8(out_linear_b*255);
 690 +               dest[r_out] = clamp_u8(out_linear_r*255);
 691 +               dest[1]     = clamp_u8(out_linear_g*255);
 692 +               dest[b_out] = clamp_u8(out_linear_b*255);
 693 +               dest += 3;
 694         }
 695  }
 696  #endif
 697 @@ -815,7 +880,7 @@ void precache_release(struct precache_output *p)
 698         }
 699  }
 700
 701 -#ifdef HAS_POSIX_MEMALIGN
 702 +#ifdef HAVE_POSIX_MEMALIGN
 703  static qcms_transform *transform_alloc(void)
 704  {
 705         qcms_transform *t;
 706 @@ -994,13 +1059,15 @@ void qcms_profile_precache_output_transform(qcms_profile *profile)
 707         if (profile->color_space != RGB_SIGNATURE)
 708                 return;
 709
 710 -       /* don't precache since we will use the B2A LUT */
 711 -       if (profile->B2A0)
 712 -               return;
 713 +       if (qcms_supports_iccv4) {
 714 +               /* don't precache since we will use the B2A LUT */
 715 +               if (profile->B2A0)
 716 +                       return;
 717
 718 -       /* don't precache since we will use the mBA LUT */
 719 -       if (profile->mBA)
 720 -               return;
 721 +               /* don't precache since we will use the mBA LUT */
 722 +               if (profile->mBA)
 723 +                       return;
 724 +       }
 725
 726         /* don't precache if we do not have the TRC curves */
 727         if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
 728 @@ -1157,14 +1224,14 @@ qcms_transform* qcms_transform_create(
 729                         return NULL;
 730                 }
 731                 if (precache) {
 732 -#ifdef X86
 733 +#if defined(SSE2_ENABLE) && defined(X86)
 734                     if (sse_version_available() >= 2) {
 735                             if (in_type == QCMS_DATA_RGB_8)
 736                                     transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
 737                             else
 738                                     transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
 739
 740 -#if !(defined(_MSC_VER) && defined(_M_AMD64))
 741 +#if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
 742                      /* Microsoft Compiler for x64 doesn't support MMX.
 743                       * SSE code uses MMX so that we disable on x64 */
 744                     } else
 745 @@ -1256,13 +1323,34 @@ qcms_transform* qcms_transform_create(
 746         return transform;
 747  }
 748
 749 -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
 750 +/* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
 751 + * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
 752 + * of the attribute but is currently only supported by clang */
 753 +#if defined(__has_attribute)
 754 +#define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
 755 +#elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__)
 756 +#define HAS_FORCE_ALIGN_ARG_POINTER 1
 757 +#else
 758 +#define HAS_FORCE_ALIGN_ARG_POINTER 0
 759 +#endif
 760 +
 761 +#if HAS_FORCE_ALIGN_ARG_POINTER
 762  /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
 763  __attribute__((__force_align_arg_pointer__))
 764  #endif
 765  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
 766  {
 767 -       transform->transform_fn(transform, src, dest, length);
 768 +       static const struct _qcms_format_type output_rgbx = { 0, 2 };
 769 +
 770 +       transform->transform_fn(transform, src, dest, length, output_rgbx);
 771 +}
 772 +
 773 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
 774 +{
 775 +       static const struct _qcms_format_type output_rgbx = { 0, 2 };
 776 +       static const struct _qcms_format_type output_bgrx = { 2, 0 };
 777 +
 778 +       transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
 779  }
 780
 781  qcms_bool qcms_supports_iccv4;
 782 diff --git a/third_party/qcms/src/transform_util.c b/third_party/qcms/src/transform_util.c
 783 index e8447e5..f4338b2 100644
 784 --- a/third_party/qcms/src/transform_util.c
 785 +++ b/third_party/qcms/src/transform_util.c
 786 @@ -36,7 +36,7 @@
 787
 788  /* value must be a value between 0 and 1 */
 789  //XXX: is the above a good restriction to have?
 790 -float lut_interp_linear(double value, uint16_t *table, int length)
 791 +float lut_interp_linear(double value, uint16_t *table, size_t length)
 792  {
 793         int upper, lower;
 794         value = value * (length - 1); // scale to length of the array
 795 @@ -49,11 +49,11 @@ float lut_interp_linear(double value, uint16_t *table, int length)
 796  }
 797
 798  /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
 799 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
 800 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length)
 801  {
 802         /* Start scaling input_value to the length of the array: 65535*(length-1).
 803          * We'll divide out the 65535 next */
 804 -       uint32_t value = (input_value * (length - 1));
 805 +       uintptr_t value = (input_value * (length - 1));
 806         uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
 807         uint32_t lower = value / 65535;           /* equivalent to floor(value/65535) */
 808         /* interp is the distance from upper to value scaled to 0..65535 */
 809 @@ -67,11 +67,11 @@ uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
 810  /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
 811   * and returns a uint8_t value representing a range from 0..1 */
 812  static
 813 -uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
 814 +uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, size_t length)
 815  {
 816         /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
 817          * We'll divide out the PRECACHE_OUTPUT_MAX next */
 818 -       uint32_t value = (input_value * (length - 1));
 819 +       uintptr_t value = (input_value * (length - 1));
 820
 821         /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
 822         uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
 823 @@ -91,7 +91,7 @@ uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table,
 824
 825  /* value must be a value between 0 and 1 */
 826  //XXX: is the above a good restriction to have?
 827 -float lut_interp_linear_float(float value, float *table, int length)
 828 +float lut_interp_linear_float(float value, float *table, size_t length)
 829  {
 830          int upper, lower;
 831          value = value * (length - 1);
 832 @@ -235,6 +235,21 @@ float u8Fixed8Number_to_float(uint16_t x)
 833         return x/256.;
 834  }
 835
 836 +/* The SSE2 code uses min & max which let NaNs pass through.
 837 +   We want to try to prevent that here by ensuring that
 838 +   gamma table is within expected values. */
 839 +void validate_gamma_table(float gamma_table[256])
 840 +{
 841 +       int i;
 842 +       for (i = 0; i < 256; i++) {
 843 +               // Note: we check that the gamma is not in range
 844 +               // instead of out of range so that we catch NaNs
 845 +               if (!(gamma_table[i] >= 0.f && gamma_table[i] <= 1.f)) {
 846 +                       gamma_table[i] = 0.f;
 847 +               }
 848 +       }
 849 +}
 850 +
 851  float *build_input_gamma_table(struct curveType *TRC)
 852  {
 853         float *gamma_table;
 854 @@ -254,7 +269,10 @@ float *build_input_gamma_table(struct curveType *TRC)
 855                         }
 856                 }
 857         }
 858 -        return gamma_table;
 859 +
 860 +       validate_gamma_table(gamma_table);
 861 +
 862 +       return gamma_table;
 863  }
 864
 865  struct matrix build_colorant_matrix(qcms_profile *p)
 866 @@ -390,7 +408,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
 867   which has an maximum error of about 9855 (pixel difference of ~38.346)
 868
 869   For now, we punt the decision of output size to the caller. */
 870 -static uint16_t *invert_lut(uint16_t *table, int length, int out_length)
 871 +static uint16_t *invert_lut(uint16_t *table, int length, size_t out_length)
 872  {
 873          int i;
 874          /* for now we invert the lut by creating a lut of size out_length
 875 diff --git a/third_party/qcms/src/transform_util.h b/third_party/qcms/src/transform_util.h
 876 index 8f358a8..de465f4 100644
 877 --- a/third_party/qcms/src/transform_util.h
 878 +++ b/third_party/qcms/src/transform_util.h
 879 @@ -31,9 +31,9 @@
 880  //XXX: could use a bettername
 881  typedef uint16_t uint16_fract_t;
 882
 883 -float lut_interp_linear(double value, uint16_t *table, int length);
 884 -float lut_interp_linear_float(float value, float *table, int length);
 885 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length);
 886 +float lut_interp_linear(double value, uint16_t *table, size_t length);
 887 +float lut_interp_linear_float(float value, float *table, size_t length);
 888 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length);
 889
 890
 891  static inline float lerp(float a, float b, float t)