1 diff --git a/third_party/qcms/src/iccread.c b/third_party/qcms/src/iccread.c
2 index 36b7011..208ebee 100644
3 --- a/third_party/qcms/src/iccread.c
4 +++ b/third_party/qcms/src/iccread.c
5 @@ -266,7 +266,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
6 if (profile->color_space != RGB_SIGNATURE)
9 - if (profile->A2B0 || profile->B2A0)
10 + if (qcms_supports_iccv4 && (profile->A2B0 || profile->B2A0))
13 rX = s15Fixed16Number_to_float(profile->redColorant.X);
14 @@ -297,6 +297,11 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
15 sum[1] = rY + gY + bY;
16 sum[2] = rZ + gZ + bZ;
18 +#if defined (_MSC_VER)
19 +#pragma warning(push)
20 +/* Disable double to float truncation warning 4305 */
21 +#pragma warning(disable:4305)
23 // Build our target vector (see mozilla bug 460629)
26 @@ -310,6 +315,10 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
30 +#if defined (_MSC_VER)
31 +/* Restore warnings */
34 // Compare with our tolerance
35 for (i = 0; i < 3; ++i) {
36 if (!(((sum[i] - tolerance[i]) <= target[i]) &&
37 @@ -331,6 +340,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
38 #define TAG_A2B0 0x41324230
39 #define TAG_B2A0 0x42324130
40 #define TAG_CHAD 0x63686164
41 +#define TAG_desc 0x64657363
43 static struct tag *find_tag(struct tag_index index, uint32_t tag_id)
45 @@ -344,6 +354,152 @@ static struct tag *find_tag(struct tag_index index, uint32_t tag_id)
49 +#define DESC_TYPE 0x64657363 // 'desc'
50 +#define MLUC_TYPE 0x6d6c7563 // 'mluc'
51 +#define MMOD_TYPE 0x6D6D6F64 // 'mmod'
53 +static bool read_tag_descType(qcms_profile *profile, struct mem_source *src, struct tag_index index, uint32_t tag_id)
55 + struct tag *tag = find_tag(index, tag_id);
57 + const uint32_t limit = sizeof profile->description;
58 + uint32_t offset = tag->offset;
59 + uint32_t type = read_u32(src, offset);
60 + uint32_t length = read_u32(src, offset+8);
61 + uint32_t i, description_offset;
63 + if (length && type == MLUC_TYPE) {
64 + length = read_u32(src, offset+20);
65 + if (!length || (length & 1) || (read_u32(src, offset+12) != 12))
66 + goto invalid_desc_tag;
67 + description_offset = offset + read_u32(src, offset+24);
69 + goto invalid_desc_tag;
71 + } else if (length && type == DESC_TYPE) {
72 + description_offset = offset + 12;
74 + goto invalid_desc_tag;
76 + if (length >= limit)
78 + for (i = 0; i < length; ++i) {
79 + uint8_t value = read_u8(src, description_offset + i);
81 + goto invalid_desc_tag;
84 + profile->description[i] = value;
86 + profile->description[length] = 0;
88 + goto invalid_desc_tag;
95 + invalid_source(src, "invalid description");
99 +#if defined(__APPLE__)
101 +// Use the dscm tag to change profile description "Display" to its more specific en-localized monitor name, if any.
103 +#define TAG_dscm 0x6473636D // 'dscm'
105 +static bool read_tag_dscmType(qcms_profile *profile, struct mem_source *src, struct tag_index index, uint32_t tag_id)
107 + if (strcmp(profile->description, "Display") != 0)
110 + struct tag *tag = find_tag(index, tag_id);
112 + uint32_t offset = tag->offset;
113 + uint32_t type = read_u32(src, offset);
114 + uint32_t records = read_u32(src, offset+8);
116 + if (!src->valid || !records || type != MLUC_TYPE)
117 + goto invalid_dscm_tag;
118 + if (read_u32(src, offset+12) != 12) // MLUC record size: bytes
119 + goto invalid_dscm_tag;
121 + for (uint32_t i = 0; i < records; ++i) {
122 + const uint32_t limit = sizeof profile->description;
123 + const uint16_t isoen = 0x656E; // ISO-3166-1 language 'en'
125 + uint16_t language = read_u16(src, offset + 16 + (i * 12) + 0);
126 + uint32_t length = read_u32(src, offset + 16 + (i * 12) + 4);
127 + uint32_t description_offset = read_u32(src, offset + 16 + (i * 12) + 8);
129 + if (!src->valid || !length || (length & 1))
130 + goto invalid_dscm_tag;
131 + if (language != isoen)
134 + // Use a prefix to identify the display description source
135 + strcpy(profile->description, "dscm:");
138 + if (length >= limit)
139 + length = limit - 1;
140 + for (uint32_t j = 5; j < length; ++j) {
141 + uint8_t value = read_u8(src, offset + description_offset + j - 5);
143 + goto invalid_dscm_tag;
144 + profile->description[j] = value ? value : '.';
146 + profile->description[length] = 0;
155 + invalid_source(src, "invalid dscm tag");
159 +// Use the mmod tag to change profile description "Display" to its specific mmod maker model data, if any.
161 +#define TAG_mmod 0x6D6D6F64 // 'mmod'
163 +static bool read_tag_mmodType(qcms_profile *profile, struct mem_source *src, struct tag_index index, uint32_t tag_id)
165 + if (strcmp(profile->description, "Display") != 0)
168 + struct tag *tag = find_tag(index, tag_id);
170 + const uint8_t length = 4 * 4; // Four 4-byte fields: 'mmod', 0, maker, model.
172 + uint32_t offset = tag->offset;
173 + if (tag->size < 40 || read_u32(src, offset) != MMOD_TYPE)
174 + goto invalid_mmod_tag;
176 + for (uint8_t i = 0; i < length; ++i) {
177 + uint8_t value = read_u8(src, offset + i);
179 + goto invalid_mmod_tag;
180 + profile->description[i] = value ? value : '.';
182 + profile->description[length] = 0;
189 + invalid_source(src, "invalid mmod tag");
195 #define XYZ_TYPE 0x58595a20 // 'XYZ '
196 #define CURVE_TYPE 0x63757276 // 'curv'
197 #define PARAMETRIC_CURVE_TYPE 0x70617261 // 'para'
198 @@ -402,7 +558,7 @@ static struct XYZNumber read_tag_XYZType(struct mem_source *src, struct tag_inde
199 // present that are not part of the tag_index.
200 static struct curveType *read_curveType(struct mem_source *src, uint32_t offset, uint32_t *len)
202 - static const size_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
203 + static const uint32_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
204 struct curveType *curve = NULL;
205 uint32_t type = read_u32(src, offset);
207 @@ -484,19 +640,23 @@ static void read_nested_curveType(struct mem_source *src, struct curveType *(*cu
208 uint32_t channel_offset = 0;
210 for (i = 0; i < num_channels; i++) {
212 + uint32_t tag_len = ~0;
214 (*curveArray)[i] = read_curveType(src, curve_offset + channel_offset, &tag_len);
215 if (!(*curveArray)[i]) {
216 invalid_source(src, "invalid nested curveType curve");
219 + if (tag_len == ~0) {
220 + invalid_source(src, "invalid nested curveType tag length");
224 channel_offset += tag_len;
226 if ((tag_len % 4) != 0)
227 channel_offset += 4 - (tag_len % 4);
232 static void mAB_release(struct lutmABType *lut)
233 @@ -540,7 +700,7 @@ static struct lutmABType *read_tag_lutmABType(struct mem_source *src, struct tag
234 // We require 3in/out channels since we only support RGB->XYZ (or RGB->LAB)
235 // XXX: If we remove this restriction make sure that the number of channels
236 // is less or equal to the maximum number of mAB curves in qcmsint.h
237 - // also check for clut_size overflow.
238 + // also check for clut_size overflow. Also make sure it's != 0
239 if (num_in_channels != 3 || num_out_channels != 3)
242 @@ -570,6 +730,9 @@ static struct lutmABType *read_tag_lutmABType(struct mem_source *src, struct tag
243 // clut_size can not overflow since lg(256^num_in_channels) = 24 bits.
244 for (i = 0; i < num_in_channels; i++) {
245 clut_size *= read_u8(src, clut_offset + i);
246 + if (clut_size == 0) {
247 + invalid_source(src, "bad clut_size");
252 @@ -590,6 +753,9 @@ static struct lutmABType *read_tag_lutmABType(struct mem_source *src, struct tag
254 for (i = 0; i < num_in_channels; i++) {
255 lut->num_grid_points[i] = read_u8(src, clut_offset + i);
256 + if (lut->num_grid_points[i] == 0) {
257 + invalid_source(src, "bad grid_points");
261 // Reverse the processing of transformation elements for mBA type.
262 @@ -657,7 +823,7 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
263 uint16_t num_input_table_entries;
264 uint16_t num_output_table_entries;
265 uint8_t in_chan, grid_points, out_chan;
266 - uint32_t clut_offset, output_offset;
267 + size_t clut_offset, output_offset;
271 @@ -672,6 +838,10 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
272 } else if (type == LUT16_TYPE) {
273 num_input_table_entries = read_u16(src, offset + 48);
274 num_output_table_entries = read_u16(src, offset + 50);
275 + if (num_input_table_entries == 0 || num_output_table_entries == 0) {
276 + invalid_source(src, "Bad channel count");
281 assert(0); // the caller checks that this doesn't happen
282 @@ -685,15 +855,18 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
284 clut_size = pow(grid_points, in_chan);
285 if (clut_size > MAX_CLUT_SIZE) {
286 + invalid_source(src, "CLUT too large");
290 if (in_chan != 3 || out_chan != 3) {
291 + invalid_source(src, "CLUT only supports RGB");
295 lut = malloc(sizeof(struct lutType) + (num_input_table_entries * in_chan + clut_size*out_chan + num_output_table_entries * out_chan)*sizeof(float));
297 + invalid_source(src, "CLUT too large");
301 @@ -704,9 +877,9 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
303 lut->num_input_table_entries = num_input_table_entries;
304 lut->num_output_table_entries = num_output_table_entries;
305 - lut->num_input_channels = read_u8(src, offset + 8);
306 - lut->num_output_channels = read_u8(src, offset + 9);
307 - lut->num_clut_grid_points = read_u8(src, offset + 10);
308 + lut->num_input_channels = in_chan;
309 + lut->num_output_channels = out_chan;
310 + lut->num_clut_grid_points = grid_points;
311 lut->e00 = read_s15Fixed16Number(src, offset+12);
312 lut->e01 = read_s15Fixed16Number(src, offset+16);
313 lut->e02 = read_s15Fixed16Number(src, offset+20);
314 @@ -979,6 +1152,9 @@ qcms_profile* qcms_profile_sRGB(void)
315 return NO_MEM_PROFILE;
317 profile = qcms_profile_create_rgb_with_table(D65, Rec709Primaries, table, 1024);
319 + strcpy(profile->description, "sRGB IEC61966-2.1");
324 @@ -997,6 +1173,9 @@ qcms_profile* qcms_profile_from_memory(const void *mem, size_t size)
329 + return INVALID_PROFILE;
331 length = read_u32(src, 0);
332 if (length <= size) {
333 // shrink the area that we can read if appropriate
334 @@ -1028,6 +1207,15 @@ qcms_profile* qcms_profile_from_memory(const void *mem, size_t size)
335 if (!src->valid || !index.tags)
336 goto invalid_tag_table;
338 + if (!read_tag_descType(profile, src, index, TAG_desc))
339 + goto invalid_tag_table;
340 +#if defined(__APPLE__)
341 + if (!read_tag_dscmType(profile, src, index, TAG_dscm))
342 + goto invalid_tag_table;
343 + if (!read_tag_mmodType(profile, src, index, TAG_mmod))
344 + goto invalid_tag_table;
347 if (find_tag(index, TAG_CHAD)) {
348 profile->chromaticAdaption = read_tag_s15Fixed16ArrayType(src, index, TAG_CHAD);
350 @@ -1098,6 +1286,16 @@ invalid_profile:
351 return INVALID_PROFILE;
354 +qcms_bool qcms_profile_match(qcms_profile *p1, qcms_profile *p2)
356 + return memcmp(p1->description, p2->description, sizeof p1->description) == 0;
359 +const char* qcms_profile_get_description(qcms_profile *profile)
361 + return profile->description;
364 qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile)
366 return profile->rendering_intent;
367 diff --git a/third_party/qcms/src/qcms.h b/third_party/qcms/src/qcms.h
368 index 7d83623..e9c0b09 100644
369 --- a/third_party/qcms/src/qcms.h
370 +++ b/third_party/qcms/src/qcms.h
371 @@ -40,6 +40,12 @@ sale, use or other dealings in this Software without written
372 authorization from SunSoft Inc.
373 ******************************************************************/
376 + * QCMS, in general, is not threadsafe. However, it should be safe to create
377 + * profile and transformation objects on different threads, so long as you
378 + * don't use the same objects on different threads at the same time.
382 * Color Space Signatures
383 * Note that only icSigXYZData and icSigLabData are valid
384 @@ -102,6 +108,12 @@ typedef enum {
388 +/* Format of the output data for qcms_transform_data_type() */
394 /* the names for the following two types are sort of ugly */
397 @@ -136,6 +148,9 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile);
398 qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile);
399 icColorSpaceSignature qcms_profile_get_color_space(qcms_profile *profile);
401 +qcms_bool qcms_profile_match(qcms_profile *p1, qcms_profile *p2);
402 +const char* qcms_profile_get_description(qcms_profile *profile);
404 void qcms_profile_precache_output_transform(qcms_profile *profile);
406 qcms_transform* qcms_transform_create(
407 @@ -143,9 +158,14 @@ qcms_transform* qcms_transform_create(
408 qcms_profile* out, qcms_data_type out_type,
411 -void qcms_transform_release(qcms_transform *);
412 +qcms_bool qcms_transform_create_LUT_zyx_bgra(
413 + qcms_profile *in, qcms_profile* out, qcms_intent intent,
414 + int samples, unsigned char* lut);
416 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length);
417 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type);
419 +void qcms_transform_release(qcms_transform *);
421 void qcms_enable_iccv4();
423 diff --git a/third_party/qcms/src/qcmsint.h b/third_party/qcms/src/qcmsint.h
424 index 53a3420..4116ed5 100644
425 --- a/third_party/qcms/src/qcmsint.h
426 +++ b/third_party/qcms/src/qcmsint.h
427 @@ -45,6 +45,11 @@ struct precache_output
428 #define ALIGN __attribute__(( aligned (16) ))
431 +typedef struct _qcms_format_type {
436 struct _qcms_transform {
437 float ALIGN matrix[3][4];
438 float *input_gamma_table_r;
439 @@ -88,7 +93,7 @@ struct _qcms_transform {
440 struct precache_output *output_table_g;
441 struct precache_output *output_table_b;
443 - void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length);
444 + void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, struct _qcms_format_type output_format);
448 @@ -225,6 +230,7 @@ struct tag_value {
449 #define LAB_SIGNATURE 0x4C616220
451 struct _qcms_profile {
452 + char description[64];
454 uint32_t color_space;
456 @@ -280,18 +286,40 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
457 void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
462 + qcms_format_type output_format);
463 void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
468 + qcms_format_type output_format);
469 void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
474 + qcms_format_type output_format);
475 void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
480 + qcms_format_type output_format);
482 extern qcms_bool qcms_supports_iccv4;
487 +long __cdecl _InterlockedIncrement(long volatile *);
488 +long __cdecl _InterlockedDecrement(long volatile *);
489 +#pragma intrinsic(_InterlockedIncrement)
490 +#pragma intrinsic(_InterlockedDecrement)
492 +#define qcms_atomic_increment(x) _InterlockedIncrement((long volatile *)&x)
493 +#define qcms_atomic_decrement(x) _InterlockedDecrement((long volatile*)&x)
497 +#define qcms_atomic_increment(x) __sync_add_and_fetch(&x, 1)
498 +#define qcms_atomic_decrement(x) __sync_sub_and_fetch(&x, 1)
501 diff --git a/third_party/qcms/src/qcmstypes.h b/third_party/qcms/src/qcmstypes.h
502 index 56d8de3..d58f691 100644
503 --- a/third_party/qcms/src/qcmstypes.h
504 +++ b/third_party/qcms/src/qcmstypes.h
511 -#include "prtypes.h"
513 -/* prtypes.h defines IS_LITTLE_ENDIAN and IS_BIG ENDIAN */
515 -#if defined (__SVR4) && defined (__sun)
516 -/* int_types.h gets included somehow, so avoid redefining the types differently */
517 -#include <sys/int_types.h>
518 -#elif defined (_AIX)
519 -#include <sys/types.h>
520 -#elif !defined(ANDROID) && !defined(__OpenBSD__)
521 -typedef PRInt8 int8_t;
522 -typedef PRUint8 uint8_t;
523 -typedef PRInt16 int16_t;
524 -typedef PRUint16 uint16_t;
525 -typedef PRInt32 int32_t;
526 -typedef PRUint32 uint32_t;
527 -typedef PRInt64 int64_t;
528 -typedef PRUint64 uint64_t;
531 -/* OS/2's stdlib typdefs uintptr_t. So we'll just include that so we don't collide */
533 -#elif !defined(__intptr_t_defined) && !defined(_UINTPTR_T_DEFINED)
534 -typedef PRUptrdiff uintptr_t;
540 #if BYTE_ORDER == LITTLE_ENDIAN
541 #define IS_LITTLE_ENDIAN
542 #elif BYTE_ORDER == BIG_ENDIAN
543 @@ -75,7 +44,7 @@ typedef PRUptrdiff uintptr_t;
545 #if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__)
546 # include <inttypes.h>
547 -#elif defined (_MSC_VER)
548 +#elif defined (_MSC_VER) && _MSC_VER < 1600
549 typedef __int8 int8_t;
550 typedef unsigned __int8 uint8_t;
551 typedef __int16 int16_t;
552 @@ -87,7 +56,12 @@ typedef unsigned __int64 uint64_t;
554 typedef unsigned __int64 uintptr_t;
556 +#pragma warning(push)
557 +/* Disable benign redefinition of type warning 4142 */
558 +#pragma warning(disable:4142)
559 typedef unsigned long uintptr_t;
560 +/* Restore warnings */
561 +#pragma warning(pop)
565 @@ -96,8 +70,6 @@ typedef unsigned long uintptr_t;
571 typedef qcms_bool bool;
574 diff --git a/third_party/qcms/src/transform-sse1.c b/third_party/qcms/src/transform-sse1.c
575 index 2f34db5..aaee1bf 100644
576 --- a/third_party/qcms/src/transform-sse1.c
577 +++ b/third_party/qcms/src/transform-sse1.c
578 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
579 void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
584 + qcms_format_type output_format)
587 float (*mat)[4] = transform->matrix;
588 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
590 /* working variables */
591 __m128 vec_r, vec_g, vec_b, result;
592 + const int r_out = output_format.r;
593 + const int b_out = output_format.b;
597 @@ -116,9 +119,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
600 /* use calc'd indices to output RGB values */
601 - dest[0] = otdata_r[output[0]];
602 - dest[1] = otdata_g[output[1]];
603 - dest[2] = otdata_b[output[2]];
604 + dest[r_out] = otdata_r[output[0]];
605 + dest[1] = otdata_g[output[1]];
606 + dest[b_out] = otdata_b[output[2]];
610 @@ -141,9 +144,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
611 result = _mm_movehl_ps(result, result);
612 *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
614 - dest[0] = otdata_r[output[0]];
615 - dest[1] = otdata_g[output[1]];
616 - dest[2] = otdata_b[output[2]];
617 + dest[r_out] = otdata_r[output[0]];
618 + dest[1] = otdata_g[output[1]];
619 + dest[b_out] = otdata_b[output[2]];
623 @@ -151,7 +154,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
624 void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
629 + qcms_format_type output_format)
632 float (*mat)[4] = transform->matrix;
633 @@ -187,6 +191,8 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
635 /* working variables */
636 __m128 vec_r, vec_g, vec_b, result;
637 + const int r_out = output_format.r;
638 + const int b_out = output_format.b;
642 @@ -239,9 +245,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
645 /* use calc'd indices to output RGB values */
646 - dest[0] = otdata_r[output[0]];
647 - dest[1] = otdata_g[output[1]];
648 - dest[2] = otdata_b[output[2]];
649 + dest[r_out] = otdata_r[output[0]];
650 + dest[1] = otdata_g[output[1]];
651 + dest[b_out] = otdata_b[output[2]];
655 @@ -266,9 +272,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
656 result = _mm_movehl_ps(result, result);
657 *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
659 - dest[0] = otdata_r[output[0]];
660 - dest[1] = otdata_g[output[1]];
661 - dest[2] = otdata_b[output[2]];
662 + dest[r_out] = otdata_r[output[0]];
663 + dest[1] = otdata_g[output[1]];
664 + dest[b_out] = otdata_b[output[2]];
668 diff --git a/third_party/qcms/src/transform-sse2.c b/third_party/qcms/src/transform-sse2.c
669 index 6a5faf9..fa7f2d1 100644
670 --- a/third_party/qcms/src/transform-sse2.c
671 +++ b/third_party/qcms/src/transform-sse2.c
672 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
673 void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
678 + qcms_format_type output_format)
681 float (*mat)[4] = transform->matrix;
682 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
684 /* working variables */
685 __m128 vec_r, vec_g, vec_b, result;
686 + const int r_out = output_format.r;
687 + const int b_out = output_format.b;
691 @@ -114,9 +117,9 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
694 /* use calc'd indices to output RGB values */
695 - dest[0] = otdata_r[output[0]];
696 - dest[1] = otdata_g[output[1]];
697 - dest[2] = otdata_b[output[2]];
698 + dest[r_out] = otdata_r[output[0]];
699 + dest[1] = otdata_g[output[1]];
700 + dest[b_out] = otdata_b[output[2]];
704 @@ -137,15 +140,16 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
706 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
708 - dest[0] = otdata_r[output[0]];
709 - dest[1] = otdata_g[output[1]];
710 - dest[2] = otdata_b[output[2]];
711 + dest[r_out] = otdata_r[output[0]];
712 + dest[1] = otdata_g[output[1]];
713 + dest[b_out] = otdata_b[output[2]];
716 void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
721 + qcms_format_type output_format)
724 float (*mat)[4] = transform->matrix;
725 @@ -181,6 +185,8 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
727 /* working variables */
728 __m128 vec_r, vec_g, vec_b, result;
729 + const int r_out = output_format.r;
730 + const int b_out = output_format.b;
734 @@ -231,9 +237,9 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
737 /* use calc'd indices to output RGB values */
738 - dest[0] = otdata_r[output[0]];
739 - dest[1] = otdata_g[output[1]];
740 - dest[2] = otdata_b[output[2]];
741 + dest[r_out] = otdata_r[output[0]];
742 + dest[1] = otdata_g[output[1]];
743 + dest[b_out] = otdata_b[output[2]];
747 @@ -256,7 +262,7 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
749 _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
751 - dest[0] = otdata_r[output[0]];
752 - dest[1] = otdata_g[output[1]];
753 - dest[2] = otdata_b[output[2]];
754 + dest[r_out] = otdata_r[output[0]];
755 + dest[1] = otdata_g[output[1]];
756 + dest[b_out] = otdata_b[output[2]];
758 diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c
759 index 9a6562b..f669a6b 100644
760 --- a/third_party/qcms/src/transform.c
761 +++ b/third_party/qcms/src/transform.c
762 @@ -181,11 +181,20 @@ compute_chromatic_adaption(struct CIE_XYZ source_white_point,
764 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
766 +#if defined (_MSC_VER)
767 +#pragma warning(push)
768 +/* Disable double to float truncation warning 4305 */
769 +#pragma warning(disable:4305)
771 struct matrix lam_rigg = {{ // Bradford matrix
772 { 0.8951, 0.2664, -0.1614 },
773 { -0.7502, 1.7135, 0.0367 },
774 { 0.0389, -0.0685, 1.0296 }
776 +#if defined (_MSC_VER)
777 +/* Restore warnings */
778 +#pragma warning(pop)
780 return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
783 @@ -230,8 +239,11 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
787 -static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
788 +static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
790 + const int r_out = output_format.r;
791 + const int b_out = output_format.b;
794 float (*mat)[4] = transform->matrix;
795 for (i=0; i<length; i++) {
796 @@ -251,15 +263,19 @@ static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned
797 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
798 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
800 - *dest++ = clamp_u8(255*out_device_r);
801 - *dest++ = clamp_u8(255*out_device_g);
802 - *dest++ = clamp_u8(255*out_device_b);
803 + dest[r_out] = clamp_u8(out_device_r*255);
804 + dest[1] = clamp_u8(out_device_g*255);
805 + dest[b_out] = clamp_u8(out_device_b*255);
811 -static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
812 +static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
814 + const int r_out = output_format.r;
815 + const int b_out = output_format.b;
818 for (i = 0; i < length; i++) {
819 float out_device_r, out_device_g, out_device_b;
820 @@ -267,13 +283,14 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
822 float linear = transform->input_gamma_table_gray[device];
824 - out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
825 + out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
826 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
827 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
829 - *dest++ = clamp_u8(out_device_r*255);
830 - *dest++ = clamp_u8(out_device_g*255);
831 - *dest++ = clamp_u8(out_device_b*255);
832 + dest[r_out] = clamp_u8(out_device_r*255);
833 + dest[1] = clamp_u8(out_device_g*255);
834 + dest[b_out] = clamp_u8(out_device_b*255);
839 @@ -283,8 +300,11 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
840 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
843 -static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
844 +static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
846 + const int r_out = output_format.r;
847 + const int b_out = output_format.b;
850 for (i = 0; i < length; i++) {
851 float out_device_r, out_device_g, out_device_b;
852 @@ -293,20 +313,24 @@ static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigne
854 float linear = transform->input_gamma_table_gray[device];
856 - out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
857 + out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
858 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
859 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
861 - *dest++ = clamp_u8(out_device_r*255);
862 - *dest++ = clamp_u8(out_device_g*255);
863 - *dest++ = clamp_u8(out_device_b*255);
865 + dest[r_out] = clamp_u8(out_device_r*255);
866 + dest[1] = clamp_u8(out_device_g*255);
867 + dest[b_out] = clamp_u8(out_device_b*255);
874 -static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
875 +static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
877 + const int r_out = output_format.r;
878 + const int b_out = output_format.b;
881 for (i = 0; i < length; i++) {
882 unsigned char device = *src++;
883 @@ -317,14 +341,19 @@ static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns
884 /* we could round here... */
885 gray = linear * PRECACHE_OUTPUT_MAX;
887 - *dest++ = transform->output_table_r->data[gray];
888 - *dest++ = transform->output_table_g->data[gray];
889 - *dest++ = transform->output_table_b->data[gray];
890 + dest[r_out] = transform->output_table_r->data[gray];
891 + dest[1] = transform->output_table_g->data[gray];
892 + dest[b_out] = transform->output_table_b->data[gray];
897 -static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
899 +static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
901 + const int r_out = output_format.r;
902 + const int b_out = output_format.b;
905 for (i = 0; i < length; i++) {
906 unsigned char device = *src++;
907 @@ -336,15 +365,19 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
908 /* we could round here... */
909 gray = linear * PRECACHE_OUTPUT_MAX;
911 - *dest++ = transform->output_table_r->data[gray];
912 - *dest++ = transform->output_table_g->data[gray];
913 - *dest++ = transform->output_table_b->data[gray];
915 + dest[r_out] = transform->output_table_r->data[gray];
916 + dest[1] = transform->output_table_g->data[gray];
917 + dest[b_out] = transform->output_table_b->data[gray];
923 -static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
924 +static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
926 + const int r_out = output_format.r;
927 + const int b_out = output_format.b;
930 float (*mat)[4] = transform->matrix;
931 for (i = 0; i < length; i++) {
932 @@ -370,14 +403,18 @@ static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform,
933 g = out_linear_g * PRECACHE_OUTPUT_MAX;
934 b = out_linear_b * PRECACHE_OUTPUT_MAX;
936 - *dest++ = transform->output_table_r->data[r];
937 - *dest++ = transform->output_table_g->data[g];
938 - *dest++ = transform->output_table_b->data[b];
939 + dest[r_out] = transform->output_table_r->data[r];
940 + dest[1] = transform->output_table_g->data[g];
941 + dest[b_out] = transform->output_table_b->data[b];
946 -static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
947 +static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
949 + const int r_out = output_format.r;
950 + const int b_out = output_format.b;
953 float (*mat)[4] = transform->matrix;
954 for (i = 0; i < length; i++) {
955 @@ -404,16 +441,21 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
956 g = out_linear_g * PRECACHE_OUTPUT_MAX;
957 b = out_linear_b * PRECACHE_OUTPUT_MAX;
959 - *dest++ = transform->output_table_r->data[r];
960 - *dest++ = transform->output_table_g->data[g];
961 - *dest++ = transform->output_table_b->data[b];
963 + dest[r_out] = transform->output_table_r->data[r];
964 + dest[1] = transform->output_table_g->data[g];
965 + dest[b_out] = transform->output_table_b->data[b];
973 -static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
974 +static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
976 + const int r_out = output_format.r;
977 + const int b_out = output_format.b;
981 int x_len = transform->grid_size;
982 @@ -462,15 +504,20 @@ static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *s
983 float b_y2 = lerp(b_x3, b_x4, y_d);
984 float clut_b = lerp(b_y1, b_y2, z_d);
986 - *dest++ = clamp_u8(clut_r*255.0f);
987 - *dest++ = clamp_u8(clut_g*255.0f);
988 - *dest++ = clamp_u8(clut_b*255.0f);
990 + dest[r_out] = clamp_u8(clut_r*255.0f);
991 + dest[1] = clamp_u8(clut_g*255.0f);
992 + dest[b_out] = clamp_u8(clut_b*255.0f);
998 // Using lcms' tetra interpolation algorithm.
999 -static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
1000 +static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
1002 + const int r_out = output_format.r;
1003 + const int b_out = output_format.b;
1007 int x_len = transform->grid_size;
1008 @@ -577,15 +624,20 @@ static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsig
1009 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
1010 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
1012 - *dest++ = clamp_u8(clut_r*255.0f);
1013 - *dest++ = clamp_u8(clut_g*255.0f);
1014 - *dest++ = clamp_u8(clut_b*255.0f);
1017 + dest[r_out] = clamp_u8(clut_r*255.0f);
1018 + dest[1] = clamp_u8(clut_g*255.0f);
1019 + dest[b_out] = clamp_u8(clut_b*255.0f);
1025 // Using lcms' tetra interpolation code.
1026 -static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
1027 +static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
1029 + const int r_out = output_format.r;
1030 + const int b_out = output_format.b;
1034 int x_len = transform->grid_size;
1035 @@ -691,14 +743,18 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
1036 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
1037 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
1039 - *dest++ = clamp_u8(clut_r*255.0f);
1040 - *dest++ = clamp_u8(clut_g*255.0f);
1041 - *dest++ = clamp_u8(clut_b*255.0f);
1043 + dest[r_out] = clamp_u8(clut_r*255.0f);
1044 + dest[1] = clamp_u8(clut_g*255.0f);
1045 + dest[b_out] = clamp_u8(clut_b*255.0f);
1050 -static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
1051 +static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
1053 + const int r_out = output_format.r;
1054 + const int b_out = output_format.b;
1057 float (*mat)[4] = transform->matrix;
1058 for (i = 0; i < length; i++) {
1059 @@ -726,14 +782,18 @@ static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned
1060 out_device_b = lut_interp_linear(out_linear_b,
1061 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
1063 - *dest++ = clamp_u8(out_device_r*255);
1064 - *dest++ = clamp_u8(out_device_g*255);
1065 - *dest++ = clamp_u8(out_device_b*255);
1066 + dest[r_out] = clamp_u8(out_device_r*255);
1067 + dest[1] = clamp_u8(out_device_g*255);
1068 + dest[b_out] = clamp_u8(out_device_b*255);
1073 -static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
1074 +static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
1076 + const int r_out = output_format.r;
1077 + const int b_out = output_format.b;
1080 float (*mat)[4] = transform->matrix;
1081 for (i = 0; i < length; i++) {
1082 @@ -762,16 +822,20 @@ static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned
1083 out_device_b = lut_interp_linear(out_linear_b,
1084 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
1086 - *dest++ = clamp_u8(out_device_r*255);
1087 - *dest++ = clamp_u8(out_device_g*255);
1088 - *dest++ = clamp_u8(out_device_b*255);
1090 + dest[r_out] = clamp_u8(out_device_r*255);
1091 + dest[1] = clamp_u8(out_device_g*255);
1092 + dest[b_out] = clamp_u8(out_device_b*255);
1099 -static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
1100 +static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
1102 + const int r_out = output_format.r;
1103 + const int b_out = output_format.b;
1106 float (*mat)[4] = transform->matrix;
1107 for (i = 0; i < length; i++) {
1108 @@ -787,16 +851,25 @@ static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsign
1109 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
1110 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
1112 - *dest++ = clamp_u8(out_linear_r*255);
1113 - *dest++ = clamp_u8(out_linear_g*255);
1114 - *dest++ = clamp_u8(out_linear_b*255);
1115 + dest[r_out] = clamp_u8(out_linear_r*255);
1116 + dest[1] = clamp_u8(out_linear_g*255);
1117 + dest[b_out] = clamp_u8(out_linear_b*255);
1124 + * If users create and destroy objects on different threads, even if the same
1125 + * objects aren't used on different threads at the same time, we can still run
1126 + * in to trouble with refcounts if they aren't atomic.
1128 + * This can lead to us prematurely deleting the precache if threads get unlucky
1129 + * and write the wrong value to the ref count.
1131 static struct precache_output *precache_reference(struct precache_output *p)
1134 + qcms_atomic_increment(p->ref_count);
1138 @@ -810,12 +883,12 @@ static struct precache_output *precache_create()
1140 void precache_release(struct precache_output *p)
1142 - if (--p->ref_count == 0) {
1143 + if (qcms_atomic_decrement(p->ref_count) == 0) {
1148 -#ifdef HAS_POSIX_MEMALIGN
1149 +#ifdef HAVE_POSIX_MEMALIGN
1150 static qcms_transform *transform_alloc(void)
1153 @@ -994,13 +1067,15 @@ void qcms_profile_precache_output_transform(qcms_profile *profile)
1154 if (profile->color_space != RGB_SIGNATURE)
1157 - /* don't precache since we will use the B2A LUT */
1158 - if (profile->B2A0)
1160 + if (qcms_supports_iccv4) {
1161 + /* don't precache since we will use the B2A LUT */
1162 + if (profile->B2A0)
1165 - /* don't precache since we will use the mBA LUT */
1168 + /* don't precache since we will use the mBA LUT */
1173 /* don't precache if we do not have the TRC curves */
1174 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1175 @@ -1043,28 +1118,31 @@ qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms
1181 src = malloc(lutSize*sizeof(float));
1182 dest = malloc(lutSize*sizeof(float));
1185 - /* Prepare a list of points we want to sample */
1186 + /* Prepare a list of points we want to sample: x, y, z order */
1188 + inverse = 1 / (float)(samples-1);
1189 for (x = 0; x < samples; x++) {
1190 for (y = 0; y < samples; y++) {
1191 for (z = 0; z < samples; z++) {
1192 - src[l++] = x / (float)(samples-1);
1193 - src[l++] = y / (float)(samples-1);
1194 - src[l++] = z / (float)(samples-1);
1195 + src[l++] = x * inverse; // r
1196 + src[l++] = y * inverse; // g
1197 + src[l++] = z * inverse; // b
1202 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1205 - transform->r_clut = &lut[0];
1206 - transform->g_clut = &lut[1];
1207 - transform->b_clut = &lut[2];
1208 + transform->r_clut = &lut[0]; // r
1209 + transform->g_clut = &lut[1]; // g
1210 + transform->b_clut = &lut[2]; // b
1211 transform->grid_size = samples;
1212 if (in_type == QCMS_DATA_RGBA_8) {
1213 transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1214 @@ -1074,11 +1152,12 @@ qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms
1219 - //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
1220 + // XXX: qcms_modular_transform_data may return the lut in either the src or the
1221 + // dest buffer. If so, it must not be free-ed.
1222 if (src && lut != src) {
1224 - } else if (dest && lut != src) {
1226 + if (dest && lut != dest) {
1230 @@ -1088,6 +1167,71 @@ qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms
1234 +/* Create a transform LUT using the given number of sample points. The transform LUT data is stored
1235 + in the output (cube) in bgra format in zyx sample order. */
1236 +qcms_bool qcms_transform_create_LUT_zyx_bgra(qcms_profile *in, qcms_profile *out, qcms_intent intent,
1237 + int samples, unsigned char* cube)
1241 + uint32_t lutSize = 3 * samples * samples * samples;
1243 + float* src = NULL;
1244 + float* dest = NULL;
1245 + float* lut = NULL;
1248 + src = malloc(lutSize*sizeof(float));
1249 + dest = malloc(lutSize*sizeof(float));
1251 + if (src && dest) {
1252 + /* Prepare a list of points we want to sample: z, y, x order */
1254 + inverse = 1 / (float)(samples-1);
1255 + for (z = 0; z < samples; z++) {
1256 + for (y = 0; y < samples; y++) {
1257 + for (x = 0; x < samples; x++) {
1258 + src[l++] = x * inverse; // r
1259 + src[l++] = y * inverse; // g
1260 + src[l++] = z * inverse; // b
1265 + lut = qcms_chain_transform(in, out, src, dest, lutSize);
1269 + for (z = 0; z < samples; z++) {
1270 + for (y = 0; y < samples; y++) {
1271 + for (x = 0; x < samples; x++) {
1272 + cube[index++] = (int)floorf(lut[l + 2] * 255.0f + 0.5f); // b
1273 + cube[index++] = (int)floorf(lut[l + 1] * 255.0f + 0.5f); // g
1274 + cube[index++] = (int)floorf(lut[l + 0] * 255.0f + 0.5f); // r
1275 + cube[index++] = 255; // a
1283 + // XXX: qcms_modular_transform_data may return the lut data in either the src or
1284 + // dest buffer so free src, dest, and lut with care.
1286 + if (src && lut != src)
1288 + if (dest && lut != dest)
1299 #define NO_MEM_TRANSFORM NULL
1301 qcms_transform* qcms_transform_create(
1302 @@ -1157,14 +1301,14 @@ qcms_transform* qcms_transform_create(
1307 +#if defined(SSE2_ENABLE) && defined(X86)
1308 if (sse_version_available() >= 2) {
1309 if (in_type == QCMS_DATA_RGB_8)
1310 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1312 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1314 -#if !(defined(_MSC_VER) && defined(_M_AMD64))
1315 +#if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
1316 /* Microsoft Compiler for x64 doesn't support MMX.
1317 * SSE code uses MMX so that we disable on x64 */
1319 @@ -1256,13 +1400,34 @@ qcms_transform* qcms_transform_create(
1323 -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
1324 +/* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
1325 + * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
1326 + * of the attribute but is currently only supported by clang */
1327 +#if defined(__has_attribute)
1328 +#define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
1329 +#elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__) && !defined(__mips__)
1330 +#define HAS_FORCE_ALIGN_ARG_POINTER 1
1332 +#define HAS_FORCE_ALIGN_ARG_POINTER 0
1335 +#if HAS_FORCE_ALIGN_ARG_POINTER
1336 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1337 __attribute__((__force_align_arg_pointer__))
1339 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1341 - transform->transform_fn(transform, src, dest, length);
1342 + static const struct _qcms_format_type output_rgbx = { 0, 2 };
1344 + transform->transform_fn(transform, src, dest, length, output_rgbx);
1347 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
1349 + static const struct _qcms_format_type output_rgbx = { 0, 2 };
1350 + static const struct _qcms_format_type output_bgrx = { 2, 0 };
1352 + transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
1355 qcms_bool qcms_supports_iccv4;
1356 diff --git a/third_party/qcms/src/transform_util.c b/third_party/qcms/src/transform_util.c
1357 index e8447e5..f616c3f 100644
1358 --- a/third_party/qcms/src/transform_util.c
1359 +++ b/third_party/qcms/src/transform_util.c
1362 /* value must be a value between 0 and 1 */
1363 //XXX: is the above a good restriction to have?
1364 -float lut_interp_linear(double value, uint16_t *table, int length)
1365 +float lut_interp_linear(double value, uint16_t *table, size_t length)
1368 value = value * (length - 1); // scale to length of the array
1369 @@ -49,11 +49,11 @@ float lut_interp_linear(double value, uint16_t *table, int length)
1372 /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
1373 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
1374 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length)
1376 /* Start scaling input_value to the length of the array: 65535*(length-1).
1377 * We'll divide out the 65535 next */
1378 - uint32_t value = (input_value * (length - 1));
1379 + uintptr_t value = (input_value * (length - 1));
1380 uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
1381 uint32_t lower = value / 65535; /* equivalent to floor(value/65535) */
1382 /* interp is the distance from upper to value scaled to 0..65535 */
1383 @@ -67,11 +67,11 @@ uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
1384 /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
1385 * and returns a uint8_t value representing a range from 0..1 */
1387 -uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
1388 +uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, size_t length)
1390 /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
1391 * We'll divide out the PRECACHE_OUTPUT_MAX next */
1392 - uint32_t value = (input_value * (length - 1));
1393 + uintptr_t value = (input_value * (length - 1));
1395 /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
1396 uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
1397 @@ -91,7 +91,7 @@ uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table,
1399 /* value must be a value between 0 and 1 */
1400 //XXX: is the above a good restriction to have?
1401 -float lut_interp_linear_float(float value, float *table, int length)
1402 +float lut_interp_linear_float(float value, float *table, size_t length)
1405 value = value * (length - 1);
1406 @@ -235,6 +235,21 @@ float u8Fixed8Number_to_float(uint16_t x)
1410 +/* The SSE2 code uses min & max which let NaNs pass through.
1411 + We want to try to prevent that here by ensuring that
1412 + gamma table is within expected values. */
1413 +void validate_gamma_table(float gamma_table[256])
1416 + for (i = 0; i < 256; i++) {
1417 + // Note: we check that the gamma is not in range
1418 + // instead of out of range so that we catch NaNs
1419 + if (!(gamma_table[i] >= 0.f && gamma_table[i] <= 1.f)) {
1420 + gamma_table[i] = 0.f;
1425 float *build_input_gamma_table(struct curveType *TRC)
1428 @@ -254,7 +269,10 @@ float *build_input_gamma_table(struct curveType *TRC)
1432 - return gamma_table;
1434 + validate_gamma_table(gamma_table);
1436 + return gamma_table;
1439 struct matrix build_colorant_matrix(qcms_profile *p)
1440 @@ -295,7 +313,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
1443 while (LutTable[NumZeroes] == 0 && NumZeroes < length-1)
1447 // There are no zeros at the beginning and we are trying to find a zero, so
1448 // return anything. It seems zero would be the less destructive choice
1449 @@ -305,22 +323,22 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
1452 while (LutTable[length-1- NumPoles] == 0xFFFF && NumPoles < length-1)
1456 // Does the curve belong to this case?
1457 if (NumZeroes > 1 || NumPoles > 1)
1462 - // Identify if value fall downto 0 or FFFF zone
1463 + // Identify if value fall downto 0 or FFFF zone
1464 if (Value == 0) return 0;
1465 // if (Value == 0xFFFF) return 0xFFFF;
1467 // else restrict to valid zone
1469 - a = ((NumZeroes-1) * 0xFFFF) / (length-1);
1470 + a = ((NumZeroes-1) * 0xFFFF) / (length-1);
1471 b = ((length-1 - NumPoles) * 0xFFFF) / (length-1);
1477 @@ -332,12 +350,12 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
1481 - res = (int) lut_interp_linear16((uint16_fract_t) (x-1), LutTable, length);
1482 + res = (int) lut_interp_linear16((uint16_fract_t) (x-1), LutTable, length);
1486 - // Found exact match.
1488 + // Found exact match.
1490 return (uint16_fract_t) (x - 1);
1493 @@ -347,14 +365,14 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
1495 // Not found, should we interpolate?
1499 // Get surrounding nodes
1502 val2 = (length-1) * ((double) (x - 1) / 65535.0);
1504 cell0 = (int) floor(val2);
1505 cell1 = (int) ceil(val2);
1508 if (cell0 == cell1) return (uint16_fract_t) x;
1510 y0 = LutTable[cell0] ;
1511 @@ -373,8 +391,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
1512 if (f < 0.0) return (uint16_fract_t) 0;
1513 if (f >= 65535.0) return (uint16_fract_t) 0xFFFF;
1515 - return (uint16_fract_t) floor(f + 0.5);
1517 + return (uint16_fract_t) floor(f + 0.5);
1521 @@ -390,7 +407,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
1522 which has an maximum error of about 9855 (pixel difference of ~38.346)
1524 For now, we punt the decision of output size to the caller. */
1525 -static uint16_t *invert_lut(uint16_t *table, int length, int out_length)
1526 +static uint16_t *invert_lut(uint16_t *table, int length, size_t out_length)
1529 /* for now we invert the lut by creating a lut of size out_length
1530 diff --git a/third_party/qcms/src/transform_util.h b/third_party/qcms/src/transform_util.h
1531 index 8f358a8..de465f4 100644
1532 --- a/third_party/qcms/src/transform_util.h
1533 +++ b/third_party/qcms/src/transform_util.h
1535 //XXX: could use a bettername
1536 typedef uint16_t uint16_fract_t;
1538 -float lut_interp_linear(double value, uint16_t *table, int length);
1539 -float lut_interp_linear_float(float value, float *table, int length);
1540 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length);
1541 +float lut_interp_linear(double value, uint16_t *table, size_t length);
1542 +float lut_interp_linear_float(float value, float *table, size_t length);
1543 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length);
1546 static inline float lerp(float a, float b, float t)