third_party/qcms/google.patch

   1 diff --git a/third_party/qcms/src/iccread.c b/third_party/qcms/src/iccread.c
   2 index 36b7011..69b7141 100644
   3 --- a/third_party/qcms/src/iccread.c
   4 +++ b/third_party/qcms/src/iccread.c
   5 @@ -266,7 +266,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
   6         if (profile->color_space != RGB_SIGNATURE)
   7                return false;
   8
   9 -       if (profile->A2B0 || profile->B2A0)
  10 +       if (qcms_supports_iccv4 && (profile->A2B0 || profile->B2A0))
  11                 return false;
  12
  13         rX = s15Fixed16Number_to_float(profile->redColorant.X);
  14 @@ -297,6 +297,11 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
  15         sum[1] = rY + gY + bY;
  16         sum[2] = rZ + gZ + bZ;
  17
  18 +#if defined (_MSC_VER)
  19 +#pragma warning(push)
  20 +/* Disable double to float truncation warning 4305 */
  21 +#pragma warning(disable:4305)
  22 +#endif
  23         // Build our target vector (see mozilla bug 460629)
  24         target[0] = 0.96420;
  25         target[1] = 1.00000;
  26 @@ -310,6 +315,10 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
  27         tolerance[1] = 0.02;
  28         tolerance[2] = 0.04;
  29
  30 +#if defined (_MSC_VER)
  31 +/* Restore warnings */
  32 +#pragma warning(pop)
  33 +#endif
  34         // Compare with our tolerance
  35         for (i = 0; i < 3; ++i) {
  36             if (!(((sum[i] - tolerance[i]) <= target[i]) &&
  37 @@ -331,6 +340,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
  38  #define TAG_A2B0 0x41324230
  39  #define TAG_B2A0 0x42324130
  40  #define TAG_CHAD 0x63686164
  41 +#define TAG_desc 0x64657363
  42
  43  static struct tag *find_tag(struct tag_index index, uint32_t tag_id)
  44  {
  45 @@ -344,6 +354,47 @@ static struct tag *find_tag(struct tag_index index, uint32_t tag_id)
  46         return tag;
  47  }
  48
  49 +#define DESC_TYPE 0x64657363 // 'desc'
  50 +#define MLUC_TYPE 0x6d6c7563 // 'mluc'
  51 +
  52 +static bool read_tag_descType(qcms_profile *profile, struct mem_source *src, struct tag_index index, uint32_t tag_id)
  53 +{
  54 +       struct tag *tag = find_tag(index, tag_id);
  55 +       if (tag) {
  56 +               const uint32_t limit = sizeof profile->description;
  57 +               uint32_t offset = tag->offset;
  58 +               uint32_t type = read_u32(src, offset);
  59 +               uint32_t length = read_u32(src, offset+8);
  60 +               uint32_t i, description;
  61 +               if (length && type == MLUC_TYPE) {
  62 +                       length = read_u32(src, offset+20);
  63 +                       if (!length || (length & 1) || (read_u32(src, offset+12) != 12))
  64 +                               goto invalid_desc_tag;
  65 +                       description = offset + read_u32(src, offset+24);
  66 +                       if (!src->valid)
  67 +                               goto invalid_desc_tag;
  68 +               } else if (length && type == DESC_TYPE) {
  69 +                       description = offset + 12;
  70 +               } else {
  71 +                       goto invalid_desc_tag;
  72 +               }
  73 +               if (length >= limit)
  74 +                       length = limit - 1;
  75 +               for (i = 0; i < length; ++i)
  76 +                       profile->description[i] = read_u8(src, description+i);
  77 +               profile->description[length] = 0;
  78 +       } else {
  79 +               goto invalid_desc_tag;
  80 +       }
  81 +
  82 +       if (src->valid)
  83 +               return true;
  84 +
  85 +invalid_desc_tag:
  86 +       invalid_source(src, "invalid description");
  87 +       return false;
  88 +}
  89 +
  90  #define XYZ_TYPE               0x58595a20 // 'XYZ '
  91  #define CURVE_TYPE             0x63757276 // 'curv'
  92  #define PARAMETRIC_CURVE_TYPE  0x70617261 // 'para'
  93 @@ -402,7 +453,7 @@ static struct XYZNumber read_tag_XYZType(struct mem_source *src, struct tag_inde
  94  // present that are not part of the tag_index.
  95  static struct curveType *read_curveType(struct mem_source *src, uint32_t offset, uint32_t *len)
  96  {
  97 -       static const size_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
  98 +       static const uint32_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
  99         struct curveType *curve = NULL;
 100         uint32_t type = read_u32(src, offset);
 101         uint32_t count;
 102 @@ -484,19 +535,23 @@ static void read_nested_curveType(struct mem_source *src, struct curveType *(*cu
 103         uint32_t channel_offset = 0;
 104         int i;
 105         for (i = 0; i < num_channels; i++) {
 106 -               uint32_t tag_len;
 107 +               uint32_t tag_len = ~0;
 108
 109                 (*curveArray)[i] = read_curveType(src, curve_offset + channel_offset, &tag_len);
 110                 if (!(*curveArray)[i]) {
 111                         invalid_source(src, "invalid nested curveType curve");
 112                 }
 113
 114 +               if (tag_len == ~0) {
 115 +                       invalid_source(src, "invalid nested curveType tag length");
 116 +                       return;
 117 +               }
 118 +
 119                 channel_offset += tag_len;
 120                 // 4 byte aligned
 121                 if ((tag_len % 4) != 0)
 122                         channel_offset += 4 - (tag_len % 4);
 123         }
 124 -
 125  }
 126
 127  static void mAB_release(struct lutmABType *lut)
 128 @@ -657,7 +712,7 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
 129         uint16_t num_input_table_entries;
 130         uint16_t num_output_table_entries;
 131         uint8_t in_chan, grid_points, out_chan;
 132 -       uint32_t clut_offset, output_offset;
 133 +       size_t clut_offset, output_offset;
 134         uint32_t clut_size;
 135         size_t entry_size;
 136         struct lutType *lut;
 137 @@ -979,6 +1034,9 @@ qcms_profile* qcms_profile_sRGB(void)
 138                 return NO_MEM_PROFILE;
 139
 140         profile = qcms_profile_create_rgb_with_table(D65, Rec709Primaries, table, 1024);
 141 +       if (profile)
 142 +               strcpy(profile->description, "sRGB IEC61966-2.1");
 143 +
 144         free(table);
 145         return profile;
 146  }
 147 @@ -997,6 +1055,9 @@ qcms_profile* qcms_profile_from_memory(const void *mem, size_t size)
 148         source.size = size;
 149         source.valid = true;
 150
 151 +       if (size < 4)
 152 +               return INVALID_PROFILE;
 153 +
 154         length = read_u32(src, 0);
 155         if (length <= size) {
 156                 // shrink the area that we can read if appropriate
 157 @@ -1028,6 +1089,9 @@ qcms_profile* qcms_profile_from_memory(const void *mem, size_t size)
 158         if (!src->valid || !index.tags)
 159                 goto invalid_tag_table;
 160
 161 +       if (!read_tag_descType(profile, src, index, TAG_desc))
 162 +               goto invalid_tag_table;
 163 +
 164         if (find_tag(index, TAG_CHAD)) {
 165                 profile->chromaticAdaption = read_tag_s15Fixed16ArrayType(src, index, TAG_CHAD);
 166         } else {
 167 @@ -1098,6 +1162,11 @@ invalid_profile:
 168         return INVALID_PROFILE;
 169  }
 170
 171 +qcms_bool qcms_profile_match(qcms_profile *p1, qcms_profile *p2)
 172 +{
 173 +    return memcmp(p1->description, p2->description, sizeof p1->description) == 0;
 174 +}
 175 +
 176  qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile)
 177  {
 178         return profile->rendering_intent;
 179 diff --git a/third_party/qcms/src/qcms.h b/third_party/qcms/src/qcms.h
 180 index 7d83623..e59528a 100644
 181 --- a/third_party/qcms/src/qcms.h
 182 +++ b/third_party/qcms/src/qcms.h
 183 @@ -40,6 +40,12 @@ sale, use or other dealings in this Software without written
 184  authorization from SunSoft Inc.
 185  ******************************************************************/
 186
 187 +/*
 188 + * QCMS, in general, is not threadsafe. However, it should be safe to create
 189 + * profile and transformation objects on different threads, so long as you
 190 + * don't use the same objects on different threads at the same time.
 191 + */
 192 +
 193  /*
 194   * Color Space Signatures
 195   * Note that only icSigXYZData and icSigLabData are valid
 196 @@ -102,6 +108,12 @@ typedef enum {
 197         QCMS_DATA_GRAYA_8
 198  } qcms_data_type;
 199
 200 +/* Format of the output data for qcms_transform_data_type() */
 201 +typedef enum {
 202 +       QCMS_OUTPUT_RGBX,
 203 +       QCMS_OUTPUT_BGRX
 204 +} qcms_output_type;
 205 +
 206  /* the names for the following two types are sort of ugly */
 207  typedef struct
 208  {
 209 @@ -136,6 +148,8 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile);
 210  qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile);
 211  icColorSpaceSignature qcms_profile_get_color_space(qcms_profile *profile);
 212
 213 +qcms_bool qcms_profile_match(qcms_profile *p1, qcms_profile *p2);
 214 +
 215  void qcms_profile_precache_output_transform(qcms_profile *profile);
 216
 217  qcms_transform* qcms_transform_create(
 218 @@ -146,6 +160,7 @@ qcms_transform* qcms_transform_create(
 219  void qcms_transform_release(qcms_transform *);
 220
 221  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length);
 222 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type);
 223
 224  void qcms_enable_iccv4();
 225
 226 diff --git a/third_party/qcms/src/qcmsint.h b/third_party/qcms/src/qcmsint.h
 227 index 53a3420..4116ed5 100644
 228 --- a/third_party/qcms/src/qcmsint.h
 229 +++ b/third_party/qcms/src/qcmsint.h
 230 @@ -45,6 +45,11 @@ struct precache_output
 231  #define ALIGN __attribute__(( aligned (16) ))
 232  #endif
 233
 234 +typedef struct _qcms_format_type {
 235 +       int r;
 236 +       int b;
 237 +} qcms_format_type;
 238 +
 239  struct _qcms_transform {
 240         float ALIGN matrix[3][4];
 241         float *input_gamma_table_r;
 242 @@ -88,7 +93,7 @@ struct _qcms_transform {
 243         struct precache_output *output_table_g;
 244         struct precache_output *output_table_b;
 245
 246 -       void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length);
 247 +       void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, struct _qcms_format_type output_format);
 248  };
 249
 250  struct matrix {
 251 @@ -225,6 +230,7 @@ struct tag_value {
 252  #define LAB_SIGNATURE  0x4C616220
 253
 254  struct _qcms_profile {
 255 +       char description[64];
 256         uint32_t class;
 257         uint32_t color_space;
 258         uint32_t pcs;
 259 @@ -280,18 +286,40 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
 260  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 261                                            unsigned char *src,
 262                                            unsigned char *dest,
 263 -                                          size_t length);
 264 +                                          size_t length,
 265 +                                          qcms_format_type output_format);
 266  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 267                                            unsigned char *src,
 268                                            unsigned char *dest,
 269 -                                          size_t length);
 270 +                                          size_t length,
 271 +                                          qcms_format_type output_format);
 272  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 273                                            unsigned char *src,
 274                                            unsigned char *dest,
 275 -                                          size_t length);
 276 +                                          size_t length,
 277 +                                          qcms_format_type output_format);
 278  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 279                                            unsigned char *src,
 280                                            unsigned char *dest,
 281 -                                          size_t length);
 282 +                                          size_t length,
 283 +                                          qcms_format_type output_format);
 284
 285  extern qcms_bool qcms_supports_iccv4;
 286 +
 287 +
 288 +#ifdef _MSC_VER
 289 +
 290 +long __cdecl _InterlockedIncrement(long volatile *);
 291 +long __cdecl _InterlockedDecrement(long volatile *);
 292 +#pragma intrinsic(_InterlockedIncrement)
 293 +#pragma intrinsic(_InterlockedDecrement)
 294 +
 295 +#define qcms_atomic_increment(x) _InterlockedIncrement((long volatile *)&x)
 296 +#define qcms_atomic_decrement(x) _InterlockedDecrement((long volatile*)&x)
 297 +
 298 +#else
 299 +
 300 +#define qcms_atomic_increment(x) __sync_add_and_fetch(&x, 1)
 301 +#define qcms_atomic_decrement(x) __sync_sub_and_fetch(&x, 1)
 302 +
 303 +#endif
 304 diff --git a/third_party/qcms/src/qcmstypes.h b/third_party/qcms/src/qcmstypes.h
 305 index 56d8de3..d58f691 100644
 306 --- a/third_party/qcms/src/qcmstypes.h
 307 +++ b/third_party/qcms/src/qcmstypes.h
 308 @@ -22,37 +22,6 @@
 309  #ifndef QCMS_TYPES_H
 310  #define QCMS_TYPES_H
 311
 312 -#ifdef MOZ_QCMS
 313 -
 314 -#include "prtypes.h"
 315 -
 316 -/* prtypes.h defines IS_LITTLE_ENDIAN and IS_BIG ENDIAN */
 317 -
 318 -#if defined (__SVR4) && defined (__sun)
 319 -/* int_types.h gets included somehow, so avoid redefining the types differently */
 320 -#include <sys/int_types.h>
 321 -#elif defined (_AIX)
 322 -#include <sys/types.h>
 323 -#elif !defined(ANDROID) && !defined(__OpenBSD__)
 324 -typedef PRInt8 int8_t;
 325 -typedef PRUint8 uint8_t;
 326 -typedef PRInt16 int16_t;
 327 -typedef PRUint16 uint16_t;
 328 -typedef PRInt32 int32_t;
 329 -typedef PRUint32 uint32_t;
 330 -typedef PRInt64 int64_t;
 331 -typedef PRUint64 uint64_t;
 332 -
 333 -#ifdef __OS2__
 334 -/* OS/2's stdlib typdefs uintptr_t. So we'll just include that so we don't collide */
 335 -#include <stdlib.h>
 336 -#elif !defined(__intptr_t_defined) && !defined(_UINTPTR_T_DEFINED)
 337 -typedef PRUptrdiff uintptr_t;
 338 -#endif
 339 -#endif
 340 -
 341 -#else // MOZ_QCMS
 342 -
 343  #if BYTE_ORDER == LITTLE_ENDIAN
 344  #define IS_LITTLE_ENDIAN
 345  #elif BYTE_ORDER == BIG_ENDIAN
 346 @@ -75,7 +44,7 @@ typedef PRUptrdiff uintptr_t;
 347
 348  #if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__)
 349  #  include <inttypes.h>
 350 -#elif defined (_MSC_VER)
 351 +#elif defined (_MSC_VER) && _MSC_VER < 1600
 352  typedef __int8 int8_t;
 353  typedef unsigned __int8 uint8_t;
 354  typedef __int16 int16_t;
 355 @@ -87,7 +56,12 @@ typedef unsigned __int64 uint64_t;
 356  #ifdef _WIN64
 357  typedef unsigned __int64 uintptr_t;
 358  #else
 359 +#pragma warning(push)
 360 +/* Disable benign redefinition of type warning 4142 */
 361 +#pragma warning(disable:4142)
 362  typedef unsigned long uintptr_t;
 363 +/* Restore warnings */
 364 +#pragma warning(pop)
 365  #endif
 366
 367  #elif defined (_AIX)
 368 @@ -96,8 +70,6 @@ typedef unsigned long uintptr_t;
 369  #  include <stdint.h>
 370  #endif
 371
 372 -#endif
 373 -
 374  typedef qcms_bool bool;
 375  #define true 1
 376  #define false 0
 377 diff --git a/third_party/qcms/src/transform-sse1.c b/third_party/qcms/src/transform-sse1.c
 378 index 2f34db5..aaee1bf 100644
 379 --- a/third_party/qcms/src/transform-sse1.c
 380 +++ b/third_party/qcms/src/transform-sse1.c
 381 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
 382  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 383                                            unsigned char *src,
 384                                            unsigned char *dest,
 385 -                                          size_t length)
 386 +                                          size_t length,
 387 +                                          qcms_format_type output_format)
 388  {
 389      unsigned int i;
 390      float (*mat)[4] = transform->matrix;
 391 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 392
 393      /* working variables */
 394      __m128 vec_r, vec_g, vec_b, result;
 395 +    const int r_out = output_format.r;
 396 +    const int b_out = output_format.b;
 397
 398      /* CYA */
 399      if (!length)
 400 @@ -116,9 +119,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 401          src += 3;
 402
 403          /* use calc'd indices to output RGB values */
 404 -        dest[0] = otdata_r[output[0]];
 405 -        dest[1] = otdata_g[output[1]];
 406 -        dest[2] = otdata_b[output[2]];
 407 +        dest[r_out] = otdata_r[output[0]];
 408 +        dest[1]     = otdata_g[output[1]];
 409 +        dest[b_out] = otdata_b[output[2]];
 410          dest += 3;
 411      }
 412
 413 @@ -141,9 +144,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 414      result = _mm_movehl_ps(result, result);
 415      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
 416
 417 -    dest[0] = otdata_r[output[0]];
 418 -    dest[1] = otdata_g[output[1]];
 419 -    dest[2] = otdata_b[output[2]];
 420 +    dest[r_out] = otdata_r[output[0]];
 421 +    dest[1]     = otdata_g[output[1]];
 422 +    dest[b_out] = otdata_b[output[2]];
 423
 424      _mm_empty();
 425  }
 426 @@ -151,7 +154,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
 427  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 428                                             unsigned char *src,
 429                                             unsigned char *dest,
 430 -                                           size_t length)
 431 +                                           size_t length,
 432 +                                           qcms_format_type output_format)
 433  {
 434      unsigned int i;
 435      float (*mat)[4] = transform->matrix;
 436 @@ -187,6 +191,8 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 437
 438      /* working variables */
 439      __m128 vec_r, vec_g, vec_b, result;
 440 +    const int r_out = output_format.r;
 441 +    const int b_out = output_format.b;
 442      unsigned char alpha;
 443
 444      /* CYA */
 445 @@ -239,9 +245,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 446          src += 4;
 447
 448          /* use calc'd indices to output RGB values */
 449 -        dest[0] = otdata_r[output[0]];
 450 -        dest[1] = otdata_g[output[1]];
 451 -        dest[2] = otdata_b[output[2]];
 452 +        dest[r_out] = otdata_r[output[0]];
 453 +        dest[1]     = otdata_g[output[1]];
 454 +        dest[b_out] = otdata_b[output[2]];
 455          dest += 4;
 456      }
 457
 458 @@ -266,9 +272,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
 459      result = _mm_movehl_ps(result, result);
 460      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
 461
 462 -    dest[0] = otdata_r[output[0]];
 463 -    dest[1] = otdata_g[output[1]];
 464 -    dest[2] = otdata_b[output[2]];
 465 +    dest[r_out] = otdata_r[output[0]];
 466 +    dest[1]     = otdata_g[output[1]];
 467 +    dest[b_out] = otdata_b[output[2]];
 468
 469      _mm_empty();
 470  }
 471 diff --git a/third_party/qcms/src/transform-sse2.c b/third_party/qcms/src/transform-sse2.c
 472 index 6a5faf9..fa7f2d1 100644
 473 --- a/third_party/qcms/src/transform-sse2.c
 474 +++ b/third_party/qcms/src/transform-sse2.c
 475 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
 476  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 477                                            unsigned char *src,
 478                                            unsigned char *dest,
 479 -                                          size_t length)
 480 +                                          size_t length,
 481 +                                          qcms_format_type output_format)
 482  {
 483      unsigned int i;
 484      float (*mat)[4] = transform->matrix;
 485 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 486
 487      /* working variables */
 488      __m128 vec_r, vec_g, vec_b, result;
 489 +    const int r_out = output_format.r;
 490 +    const int b_out = output_format.b;
 491
 492      /* CYA */
 493      if (!length)
 494 @@ -114,9 +117,9 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 495          src += 3;
 496
 497          /* use calc'd indices to output RGB values */
 498 -        dest[0] = otdata_r[output[0]];
 499 -        dest[1] = otdata_g[output[1]];
 500 -        dest[2] = otdata_b[output[2]];
 501 +        dest[r_out] = otdata_r[output[0]];
 502 +        dest[1]     = otdata_g[output[1]];
 503 +        dest[b_out] = otdata_b[output[2]];
 504          dest += 3;
 505      }
 506
 507 @@ -137,15 +140,16 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
 508
 509      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
 510
 511 -    dest[0] = otdata_r[output[0]];
 512 -    dest[1] = otdata_g[output[1]];
 513 -    dest[2] = otdata_b[output[2]];
 514 +    dest[r_out] = otdata_r[output[0]];
 515 +    dest[1]     = otdata_g[output[1]];
 516 +    dest[b_out] = otdata_b[output[2]];
 517  }
 518
 519  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 520                                             unsigned char *src,
 521                                             unsigned char *dest,
 522 -                                           size_t length)
 523 +                                           size_t length,
 524 +                                           qcms_format_type output_format)
 525  {
 526      unsigned int i;
 527      float (*mat)[4] = transform->matrix;
 528 @@ -181,6 +185,8 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 529
 530      /* working variables */
 531      __m128 vec_r, vec_g, vec_b, result;
 532 +    const int r_out = output_format.r;
 533 +    const int b_out = output_format.b;
 534      unsigned char alpha;
 535
 536      /* CYA */
 537 @@ -231,9 +237,9 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 538          src += 4;
 539
 540          /* use calc'd indices to output RGB values */
 541 -        dest[0] = otdata_r[output[0]];
 542 -        dest[1] = otdata_g[output[1]];
 543 -        dest[2] = otdata_b[output[2]];
 544 +        dest[r_out] = otdata_r[output[0]];
 545 +        dest[1]     = otdata_g[output[1]];
 546 +        dest[b_out] = otdata_b[output[2]];
 547          dest += 4;
 548      }
 549
 550 @@ -256,7 +262,7 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
 551
 552      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
 553
 554 -    dest[0] = otdata_r[output[0]];
 555 -    dest[1] = otdata_g[output[1]];
 556 -    dest[2] = otdata_b[output[2]];
 557 +    dest[r_out] = otdata_r[output[0]];
 558 +    dest[1]     = otdata_g[output[1]];
 559 +    dest[b_out] = otdata_b[output[2]];
 560  }
 561 diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c
 562 index 9a6562b..08db142 100644
 563 --- a/third_party/qcms/src/transform.c
 564 +++ b/third_party/qcms/src/transform.c
 565 @@ -181,11 +181,20 @@ compute_chromatic_adaption(struct CIE_XYZ source_white_point,
 566  static struct matrix
 567  adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
 568  {
 569 +#if defined (_MSC_VER)
 570 +#pragma warning(push)
 571 +/* Disable double to float truncation warning 4305 */
 572 +#pragma warning(disable:4305)
 573 +#endif
 574         struct matrix lam_rigg = {{ // Bradford matrix
 575                                  {  0.8951,  0.2664, -0.1614 },
 576                                  { -0.7502,  1.7135,  0.0367 },
 577                                  {  0.0389, -0.0685,  1.0296 }
 578                                  }};
 579 +#if defined (_MSC_VER)
 580 +/* Restore warnings */
 581 +#pragma warning(pop)
 582 +#endif
 583         return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
 584  }
 585
 586 @@ -230,8 +239,11 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
 587  }
 588
 589  #if 0
 590 -static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 591 +static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 592  {
 593 +       const int r_out = output_format.r;
 594 +       const int b_out = output_format.b;
 595 +
 596         int i;
 597         float (*mat)[4] = transform->matrix;
 598         for (i=0; i<length; i++) {
 599 @@ -251,15 +263,19 @@ static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned
 600                 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
 601                 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
 602
 603 -               *dest++ = clamp_u8(255*out_device_r);
 604 -               *dest++ = clamp_u8(255*out_device_g);
 605 -               *dest++ = clamp_u8(255*out_device_b);
 606 +               dest[r_out] = clamp_u8(out_device_r*255);
 607 +               dest[1]     = clamp_u8(out_device_g*255);
 608 +               dest[b_out] = clamp_u8(out_device_b*255);
 609 +               dest += 3;
 610         }
 611  }
 612  #endif
 613
 614 -static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 615 +static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 616  {
 617 +       const int r_out = output_format.r;
 618 +       const int b_out = output_format.b;
 619 +
 620         unsigned int i;
 621         for (i = 0; i < length; i++) {
 622                 float out_device_r, out_device_g, out_device_b;
 623 @@ -267,13 +283,14 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
 624
 625                 float linear = transform->input_gamma_table_gray[device];
 626
 627 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 628 +               out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 629                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 630                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 631
 632 -               *dest++ = clamp_u8(out_device_r*255);
 633 -               *dest++ = clamp_u8(out_device_g*255);
 634 -               *dest++ = clamp_u8(out_device_b*255);
 635 +               dest[r_out] = clamp_u8(out_device_r*255);
 636 +               dest[1]     = clamp_u8(out_device_g*255);
 637 +               dest[b_out] = clamp_u8(out_device_b*255);
 638 +               dest += 3;
 639         }
 640  }
 641
 642 @@ -283,8 +300,11 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
 643         See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
 644  */
 645
 646 -static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 647 +static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 648  {
 649 +       const int r_out = output_format.r;
 650 +       const int b_out = output_format.b;
 651 +
 652         unsigned int i;
 653         for (i = 0; i < length; i++) {
 654                 float out_device_r, out_device_g, out_device_b;
 655 @@ -293,20 +313,24 @@ static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigne
 656
 657                 float linear = transform->input_gamma_table_gray[device];
 658
 659 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 660 +               out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 661                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 662                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 663
 664 -               *dest++ = clamp_u8(out_device_r*255);
 665 -               *dest++ = clamp_u8(out_device_g*255);
 666 -               *dest++ = clamp_u8(out_device_b*255);
 667 -               *dest++ = alpha;
 668 +               dest[r_out] = clamp_u8(out_device_r*255);
 669 +               dest[1]     = clamp_u8(out_device_g*255);
 670 +               dest[b_out] = clamp_u8(out_device_b*255);
 671 +               dest[3]     = alpha;
 672 +               dest += 4;
 673         }
 674  }
 675
 676
 677 -static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 678 +static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 679  {
 680 +       const int r_out = output_format.r;
 681 +       const int b_out = output_format.b;
 682 +
 683         unsigned int i;
 684         for (i = 0; i < length; i++) {
 685                 unsigned char device = *src++;
 686 @@ -317,14 +341,19 @@ static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns
 687                 /* we could round here... */
 688                 gray = linear * PRECACHE_OUTPUT_MAX;
 689
 690 -               *dest++ = transform->output_table_r->data[gray];
 691 -               *dest++ = transform->output_table_g->data[gray];
 692 -               *dest++ = transform->output_table_b->data[gray];
 693 +               dest[r_out] = transform->output_table_r->data[gray];
 694 +               dest[1]     = transform->output_table_g->data[gray];
 695 +               dest[b_out] = transform->output_table_b->data[gray];
 696 +               dest += 3;
 697         }
 698  }
 699
 700 -static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 701 +
 702 +static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 703  {
 704 +       const int r_out = output_format.r;
 705 +       const int b_out = output_format.b;
 706 +
 707         unsigned int i;
 708         for (i = 0; i < length; i++) {
 709                 unsigned char device = *src++;
 710 @@ -336,15 +365,19 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
 711                 /* we could round here... */
 712                 gray = linear * PRECACHE_OUTPUT_MAX;
 713
 714 -               *dest++ = transform->output_table_r->data[gray];
 715 -               *dest++ = transform->output_table_g->data[gray];
 716 -               *dest++ = transform->output_table_b->data[gray];
 717 -               *dest++ = alpha;
 718 +               dest[r_out] = transform->output_table_r->data[gray];
 719 +               dest[1]     = transform->output_table_g->data[gray];
 720 +               dest[b_out] = transform->output_table_b->data[gray];
 721 +               dest[3]     = alpha;
 722 +               dest += 4;
 723         }
 724  }
 725
 726 -static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 727 +static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 728  {
 729 +       const int r_out = output_format.r;
 730 +       const int b_out = output_format.b;
 731 +
 732         unsigned int i;
 733         float (*mat)[4] = transform->matrix;
 734         for (i = 0; i < length; i++) {
 735 @@ -370,14 +403,18 @@ static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform,
 736                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 737                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 738
 739 -               *dest++ = transform->output_table_r->data[r];
 740 -               *dest++ = transform->output_table_g->data[g];
 741 -               *dest++ = transform->output_table_b->data[b];
 742 +               dest[r_out] = transform->output_table_r->data[r];
 743 +               dest[1]     = transform->output_table_g->data[g];
 744 +               dest[b_out] = transform->output_table_b->data[b];
 745 +               dest += 3;
 746         }
 747  }
 748
 749 -static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 750 +static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 751  {
 752 +       const int r_out = output_format.r;
 753 +       const int b_out = output_format.b;
 754 +
 755         unsigned int i;
 756         float (*mat)[4] = transform->matrix;
 757         for (i = 0; i < length; i++) {
 758 @@ -404,16 +441,21 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
 759                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 760                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 761
 762 -               *dest++ = transform->output_table_r->data[r];
 763 -               *dest++ = transform->output_table_g->data[g];
 764 -               *dest++ = transform->output_table_b->data[b];
 765 -               *dest++ = alpha;
 766 +               dest[r_out] = transform->output_table_r->data[r];
 767 +               dest[1]     = transform->output_table_g->data[g];
 768 +               dest[b_out] = transform->output_table_b->data[b];
 769 +               dest[3]     = alpha;
 770 +               dest += 4;
 771         }
 772  }
 773
 774  // Not used
 775  /*
 776 -static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
 777 +static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 778 +{
 779 +       const int r_out = output_format.r;
 780 +       const int b_out = output_format.b;
 781 +
 782         unsigned int i;
 783         int xy_len = 1;
 784         int x_len = transform->grid_size;
 785 @@ -462,15 +504,20 @@ static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *s
 786                 float b_y2 = lerp(b_x3, b_x4, y_d);
 787                 float clut_b = lerp(b_y1, b_y2, z_d);
 788
 789 -               *dest++ = clamp_u8(clut_r*255.0f);
 790 -               *dest++ = clamp_u8(clut_g*255.0f);
 791 -               *dest++ = clamp_u8(clut_b*255.0f);
 792 -       }
 793 +               dest[r_out] = clamp_u8(clut_r*255.0f);
 794 +               dest[1]     = clamp_u8(clut_g*255.0f);
 795 +               dest[b_out] = clamp_u8(clut_b*255.0f);
 796 +               dest += 3;
 797 +       }
 798  }
 799  */
 800
 801  // Using lcms' tetra interpolation algorithm.
 802 -static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
 803 +static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 804 +{
 805 +       const int r_out = output_format.r;
 806 +       const int b_out = output_format.b;
 807 +
 808         unsigned int i;
 809         int xy_len = 1;
 810         int x_len = transform->grid_size;
 811 @@ -577,15 +624,20 @@ static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsig
 812                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 813                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 814
 815 -               *dest++ = clamp_u8(clut_r*255.0f);
 816 -               *dest++ = clamp_u8(clut_g*255.0f);
 817 -               *dest++ = clamp_u8(clut_b*255.0f);
 818 -               *dest++ = in_a;
 819 -       }
 820 +               dest[r_out] = clamp_u8(clut_r*255.0f);
 821 +               dest[1]     = clamp_u8(clut_g*255.0f);
 822 +               dest[b_out] = clamp_u8(clut_b*255.0f);
 823 +               dest[3]     = in_a;
 824 +               dest += 4;
 825 +       }
 826  }
 827
 828  // Using lcms' tetra interpolation code.
 829 -static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
 830 +static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 831 +{
 832 +       const int r_out = output_format.r;
 833 +       const int b_out = output_format.b;
 834 +
 835         unsigned int i;
 836         int xy_len = 1;
 837         int x_len = transform->grid_size;
 838 @@ -691,14 +743,18 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
 839                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 840                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 841
 842 -               *dest++ = clamp_u8(clut_r*255.0f);
 843 -               *dest++ = clamp_u8(clut_g*255.0f);
 844 -               *dest++ = clamp_u8(clut_b*255.0f);
 845 -       }
 846 +               dest[r_out] = clamp_u8(clut_r*255.0f);
 847 +               dest[1]     = clamp_u8(clut_g*255.0f);
 848 +               dest[b_out] = clamp_u8(clut_b*255.0f);
 849 +               dest += 3;
 850 +       }
 851  }
 852
 853 -static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 854 +static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 855  {
 856 +       const int r_out = output_format.r;
 857 +       const int b_out = output_format.b;
 858 +
 859         unsigned int i;
 860         float (*mat)[4] = transform->matrix;
 861         for (i = 0; i < length; i++) {
 862 @@ -726,14 +782,18 @@ static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned
 863                 out_device_b = lut_interp_linear(out_linear_b,
 864                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 865
 866 -               *dest++ = clamp_u8(out_device_r*255);
 867 -               *dest++ = clamp_u8(out_device_g*255);
 868 -               *dest++ = clamp_u8(out_device_b*255);
 869 +               dest[r_out] = clamp_u8(out_device_r*255);
 870 +               dest[1]     = clamp_u8(out_device_g*255);
 871 +               dest[b_out] = clamp_u8(out_device_b*255);
 872 +               dest += 3;
 873         }
 874  }
 875
 876 -static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 877 +static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 878  {
 879 +       const int r_out = output_format.r;
 880 +       const int b_out = output_format.b;
 881 +
 882         unsigned int i;
 883         float (*mat)[4] = transform->matrix;
 884         for (i = 0; i < length; i++) {
 885 @@ -762,16 +822,20 @@ static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned
 886                 out_device_b = lut_interp_linear(out_linear_b,
 887                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 888
 889 -               *dest++ = clamp_u8(out_device_r*255);
 890 -               *dest++ = clamp_u8(out_device_g*255);
 891 -               *dest++ = clamp_u8(out_device_b*255);
 892 -               *dest++ = alpha;
 893 +               dest[r_out] = clamp_u8(out_device_r*255);
 894 +               dest[1]     = clamp_u8(out_device_g*255);
 895 +               dest[b_out] = clamp_u8(out_device_b*255);
 896 +               dest[3]     = alpha;
 897 +               dest += 4;
 898         }
 899  }
 900
 901  #if 0
 902 -static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
 903 +static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 904  {
 905 +       const int r_out = output_format.r;
 906 +       const int b_out = output_format.b;
 907 +
 908         int i;
 909         float (*mat)[4] = transform->matrix;
 910         for (i = 0; i < length; i++) {
 911 @@ -787,16 +851,25 @@ static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsign
 912                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 913                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 914
 915 -               *dest++ = clamp_u8(out_linear_r*255);
 916 -               *dest++ = clamp_u8(out_linear_g*255);
 917 -               *dest++ = clamp_u8(out_linear_b*255);
 918 +               dest[r_out] = clamp_u8(out_linear_r*255);
 919 +               dest[1]     = clamp_u8(out_linear_g*255);
 920 +               dest[b_out] = clamp_u8(out_linear_b*255);
 921 +               dest += 3;
 922         }
 923  }
 924  #endif
 925
 926 +/*
 927 + * If users create and destroy objects on different threads, even if the same
 928 + * objects aren't used on different threads at the same time, we can still run
 929 + * in to trouble with refcounts if they aren't atomic.
 930 + *
 931 + * This can lead to us prematurely deleting the precache if threads get unlucky
 932 + * and write the wrong value to the ref count.
 933 + */
 934  static struct precache_output *precache_reference(struct precache_output *p)
 935  {
 936 -       p->ref_count++;
 937 +       qcms_atomic_increment(p->ref_count);
 938         return p;
 939  }
 940
 941 @@ -810,12 +883,12 @@ static struct precache_output *precache_create()
 942
 943  void precache_release(struct precache_output *p)
 944  {
 945 -       if (--p->ref_count == 0) {
 946 +       if (qcms_atomic_decrement(p->ref_count) == 0) {
 947                 free(p);
 948         }
 949  }
 950
 951 -#ifdef HAS_POSIX_MEMALIGN
 952 +#ifdef HAVE_POSIX_MEMALIGN
 953  static qcms_transform *transform_alloc(void)
 954  {
 955         qcms_transform *t;
 956 @@ -994,13 +1067,15 @@ void qcms_profile_precache_output_transform(qcms_profile *profile)
 957         if (profile->color_space != RGB_SIGNATURE)
 958                 return;
 959
 960 -       /* don't precache since we will use the B2A LUT */
 961 -       if (profile->B2A0)
 962 -               return;
 963 +       if (qcms_supports_iccv4) {
 964 +               /* don't precache since we will use the B2A LUT */
 965 +               if (profile->B2A0)
 966 +                       return;
 967
 968 -       /* don't precache since we will use the mBA LUT */
 969 -       if (profile->mBA)
 970 -               return;
 971 +               /* don't precache since we will use the mBA LUT */
 972 +               if (profile->mBA)
 973 +                       return;
 974 +       }
 975
 976         /* don't precache if we do not have the TRC curves */
 977         if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
 978 @@ -1078,7 +1153,8 @@ qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms
 979         //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
 980         if (src && lut != src) {
 981                 free(src);
 982 -       } else if (dest && lut != src) {
 983 +       }
 984 +       if (dest && lut != dest) {
 985                 free(dest);
 986         }
 987
 988 @@ -1157,14 +1233,14 @@ qcms_transform* qcms_transform_create(
 989                         return NULL;
 990                 }
 991                 if (precache) {
 992 -#ifdef X86
 993 +#if defined(SSE2_ENABLE) && defined(X86)
 994                     if (sse_version_available() >= 2) {
 995                             if (in_type == QCMS_DATA_RGB_8)
 996                                     transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
 997                             else
 998                                     transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
 999
1000 -#if !(defined(_MSC_VER) && defined(_M_AMD64))
1001 +#if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
1002                      /* Microsoft Compiler for x64 doesn't support MMX.
1003                       * SSE code uses MMX so that we disable on x64 */
1004                     } else
1005 @@ -1256,13 +1332,34 @@ qcms_transform* qcms_transform_create(
1006         return transform;
1007  }
1008
1009 -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
1010 +/* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
1011 + * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
1012 + * of the attribute but is currently only supported by clang */
1013 +#if defined(__has_attribute)
1014 +#define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
1015 +#elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__) && !defined(__mips__)
1016 +#define HAS_FORCE_ALIGN_ARG_POINTER 1
1017 +#else
1018 +#define HAS_FORCE_ALIGN_ARG_POINTER 0
1019 +#endif
1020 +
1021 +#if HAS_FORCE_ALIGN_ARG_POINTER
1022  /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1023  __attribute__((__force_align_arg_pointer__))
1024  #endif
1025  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1026  {
1027 -       transform->transform_fn(transform, src, dest, length);
1028 +       static const struct _qcms_format_type output_rgbx = { 0, 2 };
1029 +
1030 +       transform->transform_fn(transform, src, dest, length, output_rgbx);
1031 +}
1032 +
1033 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
1034 +{
1035 +       static const struct _qcms_format_type output_rgbx = { 0, 2 };
1036 +       static const struct _qcms_format_type output_bgrx = { 2, 0 };
1037 +
1038 +       transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
1039  }
1040
1041  qcms_bool qcms_supports_iccv4;
1042 diff --git a/third_party/qcms/src/transform_util.c b/third_party/qcms/src/transform_util.c
1043 index e8447e5..f4338b2 100644
1044 --- a/third_party/qcms/src/transform_util.c
1045 +++ b/third_party/qcms/src/transform_util.c
1046 @@ -36,7 +36,7 @@
1047
1048  /* value must be a value between 0 and 1 */
1049  //XXX: is the above a good restriction to have?
1050 -float lut_interp_linear(double value, uint16_t *table, int length)
1051 +float lut_interp_linear(double value, uint16_t *table, size_t length)
1052  {
1053         int upper, lower;
1054         value = value * (length - 1); // scale to length of the array
1055 @@ -49,11 +49,11 @@ float lut_interp_linear(double value, uint16_t *table, int length)
1056  }
1057
1058  /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
1059 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
1060 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length)
1061  {
1062         /* Start scaling input_value to the length of the array: 65535*(length-1).
1063          * We'll divide out the 65535 next */
1064 -       uint32_t value = (input_value * (length - 1));
1065 +       uintptr_t value = (input_value * (length - 1));
1066         uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
1067         uint32_t lower = value / 65535;           /* equivalent to floor(value/65535) */
1068         /* interp is the distance from upper to value scaled to 0..65535 */
1069 @@ -67,11 +67,11 @@ uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
1070  /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
1071   * and returns a uint8_t value representing a range from 0..1 */
1072  static
1073 -uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
1074 +uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, size_t length)
1075  {
1076         /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
1077          * We'll divide out the PRECACHE_OUTPUT_MAX next */
1078 -       uint32_t value = (input_value * (length - 1));
1079 +       uintptr_t value = (input_value * (length - 1));
1080
1081         /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
1082         uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
1083 @@ -91,7 +91,7 @@ uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table,
1084
1085  /* value must be a value between 0 and 1 */
1086  //XXX: is the above a good restriction to have?
1087 -float lut_interp_linear_float(float value, float *table, int length)
1088 +float lut_interp_linear_float(float value, float *table, size_t length)
1089  {
1090          int upper, lower;
1091          value = value * (length - 1);
1092 @@ -235,6 +235,21 @@ float u8Fixed8Number_to_float(uint16_t x)
1093         return x/256.;
1094  }
1095
1096 +/* The SSE2 code uses min & max which let NaNs pass through.
1097 +   We want to try to prevent that here by ensuring that
1098 +   gamma table is within expected values. */
1099 +void validate_gamma_table(float gamma_table[256])
1100 +{
1101 +       int i;
1102 +       for (i = 0; i < 256; i++) {
1103 +               // Note: we check that the gamma is not in range
1104 +               // instead of out of range so that we catch NaNs
1105 +               if (!(gamma_table[i] >= 0.f && gamma_table[i] <= 1.f)) {
1106 +                       gamma_table[i] = 0.f;
1107 +               }
1108 +       }
1109 +}
1110 +
1111  float *build_input_gamma_table(struct curveType *TRC)
1112  {
1113         float *gamma_table;
1114 @@ -254,7 +269,10 @@ float *build_input_gamma_table(struct curveType *TRC)
1115                         }
1116                 }
1117         }
1118 -        return gamma_table;
1119 +
1120 +       validate_gamma_table(gamma_table);
1121 +
1122 +       return gamma_table;
1123  }
1124
1125  struct matrix build_colorant_matrix(qcms_profile *p)
1126 @@ -390,7 +408,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
1127   which has an maximum error of about 9855 (pixel difference of ~38.346)
1128
1129   For now, we punt the decision of output size to the caller. */
1130 -static uint16_t *invert_lut(uint16_t *table, int length, int out_length)
1131 +static uint16_t *invert_lut(uint16_t *table, int length, size_t out_length)
1132  {
1133          int i;
1134          /* for now we invert the lut by creating a lut of size out_length
1135 diff --git a/third_party/qcms/src/transform_util.h b/third_party/qcms/src/transform_util.h
1136 index 8f358a8..de465f4 100644
1137 --- a/third_party/qcms/src/transform_util.h
1138 +++ b/third_party/qcms/src/transform_util.h
1139 @@ -31,9 +31,9 @@
1140  //XXX: could use a bettername
1141  typedef uint16_t uint16_fract_t;
1142
1143 -float lut_interp_linear(double value, uint16_t *table, int length);
1144 -float lut_interp_linear_float(float value, float *table, int length);
1145 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length);
1146 +float lut_interp_linear(double value, uint16_t *table, size_t length);
1147 +float lut_interp_linear_float(float value, float *table, size_t length);
1148 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length);
1149
1150
1151  static inline float lerp(float a, float b, float t)