1 /* vim: set ts=8 sw=8 noexpandtab: */
3 // Copyright (C) 2009 Mozilla Corporation
4 // Copyright (C) 1998-2007 Marti Maria
6 // Permission is hereby granted, free of charge, to any person obtaining
7 // a copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the Software
11 // is furnished to do so, subject to the following conditions:
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include <string.h> //memcpy
30 #include "halffloat.h"
32 #include "transform_util.h"
34 /* for MSVC, GCC, Intel, and Sun compilers */
35 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
37 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
39 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
40 // This is just an approximation, I am not handling all the non-linear
41 // aspects of the RGB to XYZ process, and assumming that the gamma correction
42 // has transitive property in the tranformation chain.
46 // - First I build the absolute conversion matrix using
47 // primaries in XYZ. This matrix is next inverted
48 // - Then I eval the source white point across this matrix
49 // obtaining the coeficients of the transformation
50 // - Then, I apply these coeficients to the original matrix
51 static struct matrix
build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white
, qcms_CIE_xyYTRIPLE primrs
)
53 struct matrix primaries
;
54 struct matrix primaries_invert
;
56 struct vector white_point
;
68 return matrix_invalid();
77 primaries
.m
[0][0] = xr
;
78 primaries
.m
[0][1] = xg
;
79 primaries
.m
[0][2] = xb
;
81 primaries
.m
[1][0] = yr
;
82 primaries
.m
[1][1] = yg
;
83 primaries
.m
[1][2] = yb
;
85 primaries
.m
[2][0] = 1 - xr
- yr
;
86 primaries
.m
[2][1] = 1 - xg
- yg
;
87 primaries
.m
[2][2] = 1 - xb
- yb
;
88 primaries
.invalid
= false;
90 white_point
.v
[0] = xn
/yn
;
91 white_point
.v
[1] = 1.;
92 white_point
.v
[2] = (1.0-xn
-yn
)/yn
;
94 primaries_invert
= matrix_invert(primaries
);
96 coefs
= matrix_eval(primaries_invert
, white_point
);
98 result
.m
[0][0] = coefs
.v
[0]*xr
;
99 result
.m
[0][1] = coefs
.v
[1]*xg
;
100 result
.m
[0][2] = coefs
.v
[2]*xb
;
102 result
.m
[1][0] = coefs
.v
[0]*yr
;
103 result
.m
[1][1] = coefs
.v
[1]*yg
;
104 result
.m
[1][2] = coefs
.v
[2]*yb
;
106 result
.m
[2][0] = coefs
.v
[0]*(1.-xr
-yr
);
107 result
.m
[2][1] = coefs
.v
[1]*(1.-xg
-yg
);
108 result
.m
[2][2] = coefs
.v
[2]*(1.-xb
-yb
);
109 result
.invalid
= primaries_invert
.invalid
;
120 /* CIE Illuminant D50 */
121 static const struct CIE_XYZ D50_XYZ
= {
127 /* from lcms: xyY2XYZ()
128 * corresponds to argyll: icmYxy2XYZ() */
129 static struct CIE_XYZ
xyY2XYZ(qcms_CIE_xyY source
)
132 dest
.X
= (source
.x
/ source
.y
) * source
.Y
;
134 dest
.Z
= ((1 - source
.x
- source
.y
) / source
.y
) * source
.Y
;
138 /* from lcms: ComputeChromaticAdaption */
139 // Compute chromatic adaption matrix using chad as cone matrix
141 compute_chromatic_adaption(struct CIE_XYZ source_white_point
,
142 struct CIE_XYZ dest_white_point
,
145 struct matrix chad_inv
;
146 struct vector cone_source_XYZ
, cone_source_rgb
;
147 struct vector cone_dest_XYZ
, cone_dest_rgb
;
148 struct matrix cone
, tmp
;
151 chad_inv
= matrix_invert(tmp
);
153 cone_source_XYZ
.v
[0] = source_white_point
.X
;
154 cone_source_XYZ
.v
[1] = source_white_point
.Y
;
155 cone_source_XYZ
.v
[2] = source_white_point
.Z
;
157 cone_dest_XYZ
.v
[0] = dest_white_point
.X
;
158 cone_dest_XYZ
.v
[1] = dest_white_point
.Y
;
159 cone_dest_XYZ
.v
[2] = dest_white_point
.Z
;
161 cone_source_rgb
= matrix_eval(chad
, cone_source_XYZ
);
162 cone_dest_rgb
= matrix_eval(chad
, cone_dest_XYZ
);
164 cone
.m
[0][0] = cone_dest_rgb
.v
[0]/cone_source_rgb
.v
[0];
168 cone
.m
[1][1] = cone_dest_rgb
.v
[1]/cone_source_rgb
.v
[1];
172 cone
.m
[2][2] = cone_dest_rgb
.v
[2]/cone_source_rgb
.v
[2];
173 cone
.invalid
= false;
176 return matrix_multiply(chad_inv
, matrix_multiply(cone
, chad
));
179 /* from lcms: cmsAdaptionMatrix */
180 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
181 // Bradford is assumed
183 adaption_matrix(struct CIE_XYZ source_illumination
, struct CIE_XYZ target_illumination
)
185 #if defined (_MSC_VER)
186 #pragma warning(push)
187 /* Disable double to float truncation warning 4305 */
188 #pragma warning(disable:4305)
190 struct matrix lam_rigg
= {{ // Bradford matrix
191 { 0.8951, 0.2664, -0.1614 },
192 { -0.7502, 1.7135, 0.0367 },
193 { 0.0389, -0.0685, 1.0296 }
195 #if defined (_MSC_VER)
196 /* Restore warnings */
199 return compute_chromatic_adaption(source_illumination
, target_illumination
, lam_rigg
);
202 /* from lcms: cmsAdaptMatrixToD50 */
203 static struct matrix
adapt_matrix_to_D50(struct matrix r
, qcms_CIE_xyY source_white_pt
)
206 struct matrix Bradford
;
208 if (source_white_pt
.y
== 0.0)
209 return matrix_invalid();
211 Dn
= xyY2XYZ(source_white_pt
);
213 Bradford
= adaption_matrix(Dn
, D50_XYZ
);
214 return matrix_multiply(Bradford
, r
);
217 qcms_bool
set_rgb_colorants(qcms_profile
*profile
, qcms_CIE_xyY white_point
, qcms_CIE_xyYTRIPLE primaries
)
219 struct matrix colorants
;
220 colorants
= build_RGB_to_XYZ_transfer_matrix(white_point
, primaries
);
221 colorants
= adapt_matrix_to_D50(colorants
, white_point
);
223 if (colorants
.invalid
)
226 /* note: there's a transpose type of operation going on here */
227 profile
->redColorant
.X
= double_to_s15Fixed16Number(colorants
.m
[0][0]);
228 profile
->redColorant
.Y
= double_to_s15Fixed16Number(colorants
.m
[1][0]);
229 profile
->redColorant
.Z
= double_to_s15Fixed16Number(colorants
.m
[2][0]);
231 profile
->greenColorant
.X
= double_to_s15Fixed16Number(colorants
.m
[0][1]);
232 profile
->greenColorant
.Y
= double_to_s15Fixed16Number(colorants
.m
[1][1]);
233 profile
->greenColorant
.Z
= double_to_s15Fixed16Number(colorants
.m
[2][1]);
235 profile
->blueColorant
.X
= double_to_s15Fixed16Number(colorants
.m
[0][2]);
236 profile
->blueColorant
.Y
= double_to_s15Fixed16Number(colorants
.m
[1][2]);
237 profile
->blueColorant
.Z
= double_to_s15Fixed16Number(colorants
.m
[2][2]);
243 static void qcms_transform_data_rgb_out_pow(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
245 const int r_out
= output_format
.r
;
246 const int b_out
= output_format
.b
;
249 float (*mat
)[4] = transform
->matrix
;
250 for (i
=0; i
<length
; i
++) {
251 unsigned char device_r
= *src
++;
252 unsigned char device_g
= *src
++;
253 unsigned char device_b
= *src
++;
255 float linear_r
= transform
->input_gamma_table_r
[device_r
];
256 float linear_g
= transform
->input_gamma_table_g
[device_g
];
257 float linear_b
= transform
->input_gamma_table_b
[device_b
];
259 float out_linear_r
= mat
[0][0]*linear_r
+ mat
[1][0]*linear_g
+ mat
[2][0]*linear_b
;
260 float out_linear_g
= mat
[0][1]*linear_r
+ mat
[1][1]*linear_g
+ mat
[2][1]*linear_b
;
261 float out_linear_b
= mat
[0][2]*linear_r
+ mat
[1][2]*linear_g
+ mat
[2][2]*linear_b
;
263 float out_device_r
= pow(out_linear_r
, transform
->out_gamma_r
);
264 float out_device_g
= pow(out_linear_g
, transform
->out_gamma_g
);
265 float out_device_b
= pow(out_linear_b
, transform
->out_gamma_b
);
267 dest
[r_out
] = clamp_u8(out_device_r
*255);
268 dest
[1] = clamp_u8(out_device_g
*255);
269 dest
[b_out
] = clamp_u8(out_device_b
*255);
275 static void qcms_transform_data_gray_out_lut(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
277 const int r_out
= output_format
.r
;
278 const int b_out
= output_format
.b
;
281 for (i
= 0; i
< length
; i
++) {
282 float out_device_r
, out_device_g
, out_device_b
;
283 unsigned char device
= *src
++;
285 float linear
= transform
->input_gamma_table_gray
[device
];
287 out_device_r
= lut_interp_linear(linear
, transform
->output_gamma_lut_r
, transform
->output_gamma_lut_r_length
);
288 out_device_g
= lut_interp_linear(linear
, transform
->output_gamma_lut_g
, transform
->output_gamma_lut_g_length
);
289 out_device_b
= lut_interp_linear(linear
, transform
->output_gamma_lut_b
, transform
->output_gamma_lut_b_length
);
291 dest
[r_out
] = clamp_u8(out_device_r
*255);
292 dest
[1] = clamp_u8(out_device_g
*255);
293 dest
[b_out
] = clamp_u8(out_device_b
*255);
298 /* Alpha is not corrected.
299 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
300 RGB Is?" Tech Memo 17 (December 14, 1998).
301 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
304 static void qcms_transform_data_graya_out_lut(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
306 const int r_out
= output_format
.r
;
307 const int b_out
= output_format
.b
;
310 for (i
= 0; i
< length
; i
++) {
311 float out_device_r
, out_device_g
, out_device_b
;
312 unsigned char device
= *src
++;
313 unsigned char alpha
= *src
++;
315 float linear
= transform
->input_gamma_table_gray
[device
];
317 out_device_r
= lut_interp_linear(linear
, transform
->output_gamma_lut_r
, transform
->output_gamma_lut_r_length
);
318 out_device_g
= lut_interp_linear(linear
, transform
->output_gamma_lut_g
, transform
->output_gamma_lut_g_length
);
319 out_device_b
= lut_interp_linear(linear
, transform
->output_gamma_lut_b
, transform
->output_gamma_lut_b_length
);
321 dest
[r_out
] = clamp_u8(out_device_r
*255);
322 dest
[1] = clamp_u8(out_device_g
*255);
323 dest
[b_out
] = clamp_u8(out_device_b
*255);
330 static void qcms_transform_data_gray_out_precache(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
332 const int r_out
= output_format
.r
;
333 const int b_out
= output_format
.b
;
336 for (i
= 0; i
< length
; i
++) {
337 unsigned char device
= *src
++;
340 float linear
= transform
->input_gamma_table_gray
[device
];
342 /* we could round here... */
343 gray
= linear
* PRECACHE_OUTPUT_MAX
;
345 dest
[r_out
] = transform
->output_table_r
->data
[gray
];
346 dest
[1] = transform
->output_table_g
->data
[gray
];
347 dest
[b_out
] = transform
->output_table_b
->data
[gray
];
353 static void qcms_transform_data_graya_out_precache(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
355 const int r_out
= output_format
.r
;
356 const int b_out
= output_format
.b
;
359 for (i
= 0; i
< length
; i
++) {
360 unsigned char device
= *src
++;
361 unsigned char alpha
= *src
++;
364 float linear
= transform
->input_gamma_table_gray
[device
];
366 /* we could round here... */
367 gray
= linear
* PRECACHE_OUTPUT_MAX
;
369 dest
[r_out
] = transform
->output_table_r
->data
[gray
];
370 dest
[1] = transform
->output_table_g
->data
[gray
];
371 dest
[b_out
] = transform
->output_table_b
->data
[gray
];
377 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
379 const int r_out
= output_format
.r
;
380 const int b_out
= output_format
.b
;
383 float (*mat
)[4] = transform
->matrix
;
384 for (i
= 0; i
< length
; i
++) {
385 unsigned char device_r
= *src
++;
386 unsigned char device_g
= *src
++;
387 unsigned char device_b
= *src
++;
390 float linear_r
= transform
->input_gamma_table_r
[device_r
];
391 float linear_g
= transform
->input_gamma_table_g
[device_g
];
392 float linear_b
= transform
->input_gamma_table_b
[device_b
];
394 float out_linear_r
= mat
[0][0]*linear_r
+ mat
[1][0]*linear_g
+ mat
[2][0]*linear_b
;
395 float out_linear_g
= mat
[0][1]*linear_r
+ mat
[1][1]*linear_g
+ mat
[2][1]*linear_b
;
396 float out_linear_b
= mat
[0][2]*linear_r
+ mat
[1][2]*linear_g
+ mat
[2][2]*linear_b
;
398 out_linear_r
= clamp_float(out_linear_r
);
399 out_linear_g
= clamp_float(out_linear_g
);
400 out_linear_b
= clamp_float(out_linear_b
);
402 /* we could round here... */
403 r
= out_linear_r
* PRECACHE_OUTPUT_MAX
;
404 g
= out_linear_g
* PRECACHE_OUTPUT_MAX
;
405 b
= out_linear_b
* PRECACHE_OUTPUT_MAX
;
407 dest
[r_out
] = transform
->output_table_r
->data
[r
];
408 dest
[1] = transform
->output_table_g
->data
[g
];
409 dest
[b_out
] = transform
->output_table_b
->data
[b
];
414 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
416 const int r_out
= output_format
.r
;
417 const int b_out
= output_format
.b
;
420 float (*mat
)[4] = transform
->matrix
;
421 for (i
= 0; i
< length
; i
++) {
422 unsigned char device_r
= *src
++;
423 unsigned char device_g
= *src
++;
424 unsigned char device_b
= *src
++;
425 unsigned char alpha
= *src
++;
428 float linear_r
= transform
->input_gamma_table_r
[device_r
];
429 float linear_g
= transform
->input_gamma_table_g
[device_g
];
430 float linear_b
= transform
->input_gamma_table_b
[device_b
];
432 float out_linear_r
= mat
[0][0]*linear_r
+ mat
[1][0]*linear_g
+ mat
[2][0]*linear_b
;
433 float out_linear_g
= mat
[0][1]*linear_r
+ mat
[1][1]*linear_g
+ mat
[2][1]*linear_b
;
434 float out_linear_b
= mat
[0][2]*linear_r
+ mat
[1][2]*linear_g
+ mat
[2][2]*linear_b
;
436 out_linear_r
= clamp_float(out_linear_r
);
437 out_linear_g
= clamp_float(out_linear_g
);
438 out_linear_b
= clamp_float(out_linear_b
);
440 /* we could round here... */
441 r
= out_linear_r
* PRECACHE_OUTPUT_MAX
;
442 g
= out_linear_g
* PRECACHE_OUTPUT_MAX
;
443 b
= out_linear_b
* PRECACHE_OUTPUT_MAX
;
445 dest
[r_out
] = transform
->output_table_r
->data
[r
];
446 dest
[1] = transform
->output_table_g
->data
[g
];
447 dest
[b_out
] = transform
->output_table_b
->data
[b
];
455 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
457 const int r_out = output_format.r;
458 const int b_out = output_format.b;
462 int x_len = transform->grid_size;
463 int len = x_len * x_len;
464 float* r_table = transform->r_clut;
465 float* g_table = transform->g_clut;
466 float* b_table = transform->b_clut;
468 for (i = 0; i < length; i++) {
469 unsigned char in_r = *src++;
470 unsigned char in_g = *src++;
471 unsigned char in_b = *src++;
472 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
474 int x = floor(linear_r * (transform->grid_size-1));
475 int y = floor(linear_g * (transform->grid_size-1));
476 int z = floor(linear_b * (transform->grid_size-1));
477 int x_n = ceil(linear_r * (transform->grid_size-1));
478 int y_n = ceil(linear_g * (transform->grid_size-1));
479 int z_n = ceil(linear_b * (transform->grid_size-1));
480 float x_d = linear_r * (transform->grid_size-1) - x;
481 float y_d = linear_g * (transform->grid_size-1) - y;
482 float z_d = linear_b * (transform->grid_size-1) - z;
484 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
485 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
486 float r_y1 = lerp(r_x1, r_x2, y_d);
487 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
488 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
489 float r_y2 = lerp(r_x3, r_x4, y_d);
490 float clut_r = lerp(r_y1, r_y2, z_d);
492 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
493 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
494 float g_y1 = lerp(g_x1, g_x2, y_d);
495 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
496 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
497 float g_y2 = lerp(g_x3, g_x4, y_d);
498 float clut_g = lerp(g_y1, g_y2, z_d);
500 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
501 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
502 float b_y1 = lerp(b_x1, b_x2, y_d);
503 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
504 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
505 float b_y2 = lerp(b_x3, b_x4, y_d);
506 float clut_b = lerp(b_y1, b_y2, z_d);
508 dest[r_out] = clamp_u8(clut_r*255.0f);
509 dest[1] = clamp_u8(clut_g*255.0f);
510 dest[b_out] = clamp_u8(clut_b*255.0f);
516 // Using lcms' tetra interpolation algorithm.
517 void qcms_transform_data_tetra_clut_rgba(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
519 const int r_out
= output_format
.r
;
520 const int b_out
= output_format
.b
;
524 int x_len
= transform
->grid_size
;
525 int len
= x_len
* x_len
;
526 float* r_table
= transform
->r_clut
;
527 float* g_table
= transform
->g_clut
;
528 float* b_table
= transform
->b_clut
;
529 float c0_r
, c1_r
, c2_r
, c3_r
;
530 float c0_g
, c1_g
, c2_g
, c3_g
;
531 float c0_b
, c1_b
, c2_b
, c3_b
;
532 float clut_r
, clut_g
, clut_b
;
533 for (i
= 0; i
< length
; i
++) {
534 unsigned char in_r
= *src
++;
535 unsigned char in_g
= *src
++;
536 unsigned char in_b
= *src
++;
537 unsigned char in_a
= *src
++;
538 float linear_r
= in_r
/255.0f
, linear_g
=in_g
/255.0f
, linear_b
= in_b
/255.0f
;
540 int x
= floor(linear_r
* (transform
->grid_size
-1));
541 int y
= floor(linear_g
* (transform
->grid_size
-1));
542 int z
= floor(linear_b
* (transform
->grid_size
-1));
543 int x_n
= ceil(linear_r
* (transform
->grid_size
-1));
544 int y_n
= ceil(linear_g
* (transform
->grid_size
-1));
545 int z_n
= ceil(linear_b
* (transform
->grid_size
-1));
546 float rx
= linear_r
* (transform
->grid_size
-1) - x
;
547 float ry
= linear_g
* (transform
->grid_size
-1) - y
;
548 float rz
= linear_b
* (transform
->grid_size
-1) - z
;
550 c0_r
= CLU(r_table
, x
, y
, z
);
551 c0_g
= CLU(g_table
, x
, y
, z
);
552 c0_b
= CLU(b_table
, x
, y
, z
);
555 if (ry
>= rz
) { //rx >= ry && ry >= rz
556 c1_r
= CLU(r_table
, x_n
, y
, z
) - c0_r
;
557 c2_r
= CLU(r_table
, x_n
, y_n
, z
) - CLU(r_table
, x_n
, y
, z
);
558 c3_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y_n
, z
);
559 c1_g
= CLU(g_table
, x_n
, y
, z
) - c0_g
;
560 c2_g
= CLU(g_table
, x_n
, y_n
, z
) - CLU(g_table
, x_n
, y
, z
);
561 c3_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y_n
, z
);
562 c1_b
= CLU(b_table
, x_n
, y
, z
) - c0_b
;
563 c2_b
= CLU(b_table
, x_n
, y_n
, z
) - CLU(b_table
, x_n
, y
, z
);
564 c3_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y_n
, z
);
566 if (rx
>= rz
) { //rx >= rz && rz >= ry
567 c1_r
= CLU(r_table
, x_n
, y
, z
) - c0_r
;
568 c2_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y
, z_n
);
569 c3_r
= CLU(r_table
, x_n
, y
, z_n
) - CLU(r_table
, x_n
, y
, z
);
570 c1_g
= CLU(g_table
, x_n
, y
, z
) - c0_g
;
571 c2_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y
, z_n
);
572 c3_g
= CLU(g_table
, x_n
, y
, z_n
) - CLU(g_table
, x_n
, y
, z
);
573 c1_b
= CLU(b_table
, x_n
, y
, z
) - c0_b
;
574 c2_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y
, z_n
);
575 c3_b
= CLU(b_table
, x_n
, y
, z_n
) - CLU(b_table
, x_n
, y
, z
);
576 } else { //rz > rx && rx >= ry
577 c1_r
= CLU(r_table
, x_n
, y
, z_n
) - CLU(r_table
, x
, y
, z_n
);
578 c2_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y
, z_n
);
579 c3_r
= CLU(r_table
, x
, y
, z_n
) - c0_r
;
580 c1_g
= CLU(g_table
, x_n
, y
, z_n
) - CLU(g_table
, x
, y
, z_n
);
581 c2_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y
, z_n
);
582 c3_g
= CLU(g_table
, x
, y
, z_n
) - c0_g
;
583 c1_b
= CLU(b_table
, x_n
, y
, z_n
) - CLU(b_table
, x
, y
, z_n
);
584 c2_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y
, z_n
);
585 c3_b
= CLU(b_table
, x
, y
, z_n
) - c0_b
;
589 if (rx
>= rz
) { //ry > rx && rx >= rz
590 c1_r
= CLU(r_table
, x_n
, y_n
, z
) - CLU(r_table
, x
, y_n
, z
);
591 c2_r
= CLU(r_table
, x
, y_n
, z
) - c0_r
;
592 c3_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y_n
, z
);
593 c1_g
= CLU(g_table
, x_n
, y_n
, z
) - CLU(g_table
, x
, y_n
, z
);
594 c2_g
= CLU(g_table
, x
, y_n
, z
) - c0_g
;
595 c3_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y_n
, z
);
596 c1_b
= CLU(b_table
, x_n
, y_n
, z
) - CLU(b_table
, x
, y_n
, z
);
597 c2_b
= CLU(b_table
, x
, y_n
, z
) - c0_b
;
598 c3_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y_n
, z
);
600 if (ry
>= rz
) { //ry >= rz && rz > rx
601 c1_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x
, y_n
, z_n
);
602 c2_r
= CLU(r_table
, x
, y_n
, z
) - c0_r
;
603 c3_r
= CLU(r_table
, x
, y_n
, z_n
) - CLU(r_table
, x
, y_n
, z
);
604 c1_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x
, y_n
, z_n
);
605 c2_g
= CLU(g_table
, x
, y_n
, z
) - c0_g
;
606 c3_g
= CLU(g_table
, x
, y_n
, z_n
) - CLU(g_table
, x
, y_n
, z
);
607 c1_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x
, y_n
, z_n
);
608 c2_b
= CLU(b_table
, x
, y_n
, z
) - c0_b
;
609 c3_b
= CLU(b_table
, x
, y_n
, z_n
) - CLU(b_table
, x
, y_n
, z
);
610 } else { //rz > ry && ry > rx
611 c1_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x
, y_n
, z_n
);
612 c2_r
= CLU(r_table
, x
, y_n
, z_n
) - CLU(r_table
, x
, y
, z_n
);
613 c3_r
= CLU(r_table
, x
, y
, z_n
) - c0_r
;
614 c1_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x
, y_n
, z_n
);
615 c2_g
= CLU(g_table
, x
, y_n
, z_n
) - CLU(g_table
, x
, y
, z_n
);
616 c3_g
= CLU(g_table
, x
, y
, z_n
) - c0_g
;
617 c1_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x
, y_n
, z_n
);
618 c2_b
= CLU(b_table
, x
, y_n
, z_n
) - CLU(b_table
, x
, y
, z_n
);
619 c3_b
= CLU(b_table
, x
, y
, z_n
) - c0_b
;
624 clut_r
= c0_r
+ c1_r
*rx
+ c2_r
*ry
+ c3_r
*rz
;
625 clut_g
= c0_g
+ c1_g
*rx
+ c2_g
*ry
+ c3_g
*rz
;
626 clut_b
= c0_b
+ c1_b
*rx
+ c2_b
*ry
+ c3_b
*rz
;
628 dest
[r_out
] = clamp_u8(clut_r
*255.0f
);
629 dest
[1] = clamp_u8(clut_g
*255.0f
);
630 dest
[b_out
] = clamp_u8(clut_b
*255.0f
);
636 // Using lcms' tetra interpolation code.
637 static void qcms_transform_data_tetra_clut(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
639 const int r_out
= output_format
.r
;
640 const int b_out
= output_format
.b
;
644 int x_len
= transform
->grid_size
;
645 int len
= x_len
* x_len
;
646 float* r_table
= transform
->r_clut
;
647 float* g_table
= transform
->g_clut
;
648 float* b_table
= transform
->b_clut
;
649 float c0_r
, c1_r
, c2_r
, c3_r
;
650 float c0_g
, c1_g
, c2_g
, c3_g
;
651 float c0_b
, c1_b
, c2_b
, c3_b
;
652 float clut_r
, clut_g
, clut_b
;
653 for (i
= 0; i
< length
; i
++) {
654 unsigned char in_r
= *src
++;
655 unsigned char in_g
= *src
++;
656 unsigned char in_b
= *src
++;
657 float linear_r
= in_r
/255.0f
, linear_g
=in_g
/255.0f
, linear_b
= in_b
/255.0f
;
659 int x
= floor(linear_r
* (transform
->grid_size
-1));
660 int y
= floor(linear_g
* (transform
->grid_size
-1));
661 int z
= floor(linear_b
* (transform
->grid_size
-1));
662 int x_n
= ceil(linear_r
* (transform
->grid_size
-1));
663 int y_n
= ceil(linear_g
* (transform
->grid_size
-1));
664 int z_n
= ceil(linear_b
* (transform
->grid_size
-1));
665 float rx
= linear_r
* (transform
->grid_size
-1) - x
;
666 float ry
= linear_g
* (transform
->grid_size
-1) - y
;
667 float rz
= linear_b
* (transform
->grid_size
-1) - z
;
669 c0_r
= CLU(r_table
, x
, y
, z
);
670 c0_g
= CLU(g_table
, x
, y
, z
);
671 c0_b
= CLU(b_table
, x
, y
, z
);
674 if (ry
>= rz
) { //rx >= ry && ry >= rz
675 c1_r
= CLU(r_table
, x_n
, y
, z
) - c0_r
;
676 c2_r
= CLU(r_table
, x_n
, y_n
, z
) - CLU(r_table
, x_n
, y
, z
);
677 c3_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y_n
, z
);
678 c1_g
= CLU(g_table
, x_n
, y
, z
) - c0_g
;
679 c2_g
= CLU(g_table
, x_n
, y_n
, z
) - CLU(g_table
, x_n
, y
, z
);
680 c3_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y_n
, z
);
681 c1_b
= CLU(b_table
, x_n
, y
, z
) - c0_b
;
682 c2_b
= CLU(b_table
, x_n
, y_n
, z
) - CLU(b_table
, x_n
, y
, z
);
683 c3_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y_n
, z
);
685 if (rx
>= rz
) { //rx >= rz && rz >= ry
686 c1_r
= CLU(r_table
, x_n
, y
, z
) - c0_r
;
687 c2_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y
, z_n
);
688 c3_r
= CLU(r_table
, x_n
, y
, z_n
) - CLU(r_table
, x_n
, y
, z
);
689 c1_g
= CLU(g_table
, x_n
, y
, z
) - c0_g
;
690 c2_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y
, z_n
);
691 c3_g
= CLU(g_table
, x_n
, y
, z_n
) - CLU(g_table
, x_n
, y
, z
);
692 c1_b
= CLU(b_table
, x_n
, y
, z
) - c0_b
;
693 c2_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y
, z_n
);
694 c3_b
= CLU(b_table
, x_n
, y
, z_n
) - CLU(b_table
, x_n
, y
, z
);
695 } else { //rz > rx && rx >= ry
696 c1_r
= CLU(r_table
, x_n
, y
, z_n
) - CLU(r_table
, x
, y
, z_n
);
697 c2_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y
, z_n
);
698 c3_r
= CLU(r_table
, x
, y
, z_n
) - c0_r
;
699 c1_g
= CLU(g_table
, x_n
, y
, z_n
) - CLU(g_table
, x
, y
, z_n
);
700 c2_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y
, z_n
);
701 c3_g
= CLU(g_table
, x
, y
, z_n
) - c0_g
;
702 c1_b
= CLU(b_table
, x_n
, y
, z_n
) - CLU(b_table
, x
, y
, z_n
);
703 c2_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y
, z_n
);
704 c3_b
= CLU(b_table
, x
, y
, z_n
) - c0_b
;
708 if (rx
>= rz
) { //ry > rx && rx >= rz
709 c1_r
= CLU(r_table
, x_n
, y_n
, z
) - CLU(r_table
, x
, y_n
, z
);
710 c2_r
= CLU(r_table
, x
, y_n
, z
) - c0_r
;
711 c3_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x_n
, y_n
, z
);
712 c1_g
= CLU(g_table
, x_n
, y_n
, z
) - CLU(g_table
, x
, y_n
, z
);
713 c2_g
= CLU(g_table
, x
, y_n
, z
) - c0_g
;
714 c3_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x_n
, y_n
, z
);
715 c1_b
= CLU(b_table
, x_n
, y_n
, z
) - CLU(b_table
, x
, y_n
, z
);
716 c2_b
= CLU(b_table
, x
, y_n
, z
) - c0_b
;
717 c3_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x_n
, y_n
, z
);
719 if (ry
>= rz
) { //ry >= rz && rz > rx
720 c1_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x
, y_n
, z_n
);
721 c2_r
= CLU(r_table
, x
, y_n
, z
) - c0_r
;
722 c3_r
= CLU(r_table
, x
, y_n
, z_n
) - CLU(r_table
, x
, y_n
, z
);
723 c1_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x
, y_n
, z_n
);
724 c2_g
= CLU(g_table
, x
, y_n
, z
) - c0_g
;
725 c3_g
= CLU(g_table
, x
, y_n
, z_n
) - CLU(g_table
, x
, y_n
, z
);
726 c1_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x
, y_n
, z_n
);
727 c2_b
= CLU(b_table
, x
, y_n
, z
) - c0_b
;
728 c3_b
= CLU(b_table
, x
, y_n
, z_n
) - CLU(b_table
, x
, y_n
, z
);
729 } else { //rz > ry && ry > rx
730 c1_r
= CLU(r_table
, x_n
, y_n
, z_n
) - CLU(r_table
, x
, y_n
, z_n
);
731 c2_r
= CLU(r_table
, x
, y_n
, z_n
) - CLU(r_table
, x
, y
, z_n
);
732 c3_r
= CLU(r_table
, x
, y
, z_n
) - c0_r
;
733 c1_g
= CLU(g_table
, x_n
, y_n
, z_n
) - CLU(g_table
, x
, y_n
, z_n
);
734 c2_g
= CLU(g_table
, x
, y_n
, z_n
) - CLU(g_table
, x
, y
, z_n
);
735 c3_g
= CLU(g_table
, x
, y
, z_n
) - c0_g
;
736 c1_b
= CLU(b_table
, x_n
, y_n
, z_n
) - CLU(b_table
, x
, y_n
, z_n
);
737 c2_b
= CLU(b_table
, x
, y_n
, z_n
) - CLU(b_table
, x
, y
, z_n
);
738 c3_b
= CLU(b_table
, x
, y
, z_n
) - c0_b
;
743 clut_r
= c0_r
+ c1_r
*rx
+ c2_r
*ry
+ c3_r
*rz
;
744 clut_g
= c0_g
+ c1_g
*rx
+ c2_g
*ry
+ c3_g
*rz
;
745 clut_b
= c0_b
+ c1_b
*rx
+ c2_b
*ry
+ c3_b
*rz
;
747 dest
[r_out
] = clamp_u8(clut_r
*255.0f
);
748 dest
[1] = clamp_u8(clut_g
*255.0f
);
749 dest
[b_out
] = clamp_u8(clut_b
*255.0f
);
754 static void qcms_transform_data_rgb_out_lut(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
756 const int r_out
= output_format
.r
;
757 const int b_out
= output_format
.b
;
760 float (*mat
)[4] = transform
->matrix
;
761 for (i
= 0; i
< length
; i
++) {
762 unsigned char device_r
= *src
++;
763 unsigned char device_g
= *src
++;
764 unsigned char device_b
= *src
++;
765 float out_device_r
, out_device_g
, out_device_b
;
767 float linear_r
= transform
->input_gamma_table_r
[device_r
];
768 float linear_g
= transform
->input_gamma_table_g
[device_g
];
769 float linear_b
= transform
->input_gamma_table_b
[device_b
];
771 float out_linear_r
= mat
[0][0]*linear_r
+ mat
[1][0]*linear_g
+ mat
[2][0]*linear_b
;
772 float out_linear_g
= mat
[0][1]*linear_r
+ mat
[1][1]*linear_g
+ mat
[2][1]*linear_b
;
773 float out_linear_b
= mat
[0][2]*linear_r
+ mat
[1][2]*linear_g
+ mat
[2][2]*linear_b
;
775 out_linear_r
= clamp_float(out_linear_r
);
776 out_linear_g
= clamp_float(out_linear_g
);
777 out_linear_b
= clamp_float(out_linear_b
);
779 out_device_r
= lut_interp_linear(out_linear_r
,
780 transform
->output_gamma_lut_r
, transform
->output_gamma_lut_r_length
);
781 out_device_g
= lut_interp_linear(out_linear_g
,
782 transform
->output_gamma_lut_g
, transform
->output_gamma_lut_g_length
);
783 out_device_b
= lut_interp_linear(out_linear_b
,
784 transform
->output_gamma_lut_b
, transform
->output_gamma_lut_b_length
);
786 dest
[r_out
] = clamp_u8(out_device_r
*255);
787 dest
[1] = clamp_u8(out_device_g
*255);
788 dest
[b_out
] = clamp_u8(out_device_b
*255);
793 static void qcms_transform_data_rgba_out_lut(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
795 const int r_out
= output_format
.r
;
796 const int b_out
= output_format
.b
;
799 float (*mat
)[4] = transform
->matrix
;
800 for (i
= 0; i
< length
; i
++) {
801 unsigned char device_r
= *src
++;
802 unsigned char device_g
= *src
++;
803 unsigned char device_b
= *src
++;
804 unsigned char alpha
= *src
++;
805 float out_device_r
, out_device_g
, out_device_b
;
807 float linear_r
= transform
->input_gamma_table_r
[device_r
];
808 float linear_g
= transform
->input_gamma_table_g
[device_g
];
809 float linear_b
= transform
->input_gamma_table_b
[device_b
];
811 float out_linear_r
= mat
[0][0]*linear_r
+ mat
[1][0]*linear_g
+ mat
[2][0]*linear_b
;
812 float out_linear_g
= mat
[0][1]*linear_r
+ mat
[1][1]*linear_g
+ mat
[2][1]*linear_b
;
813 float out_linear_b
= mat
[0][2]*linear_r
+ mat
[1][2]*linear_g
+ mat
[2][2]*linear_b
;
815 out_linear_r
= clamp_float(out_linear_r
);
816 out_linear_g
= clamp_float(out_linear_g
);
817 out_linear_b
= clamp_float(out_linear_b
);
819 out_device_r
= lut_interp_linear(out_linear_r
,
820 transform
->output_gamma_lut_r
, transform
->output_gamma_lut_r_length
);
821 out_device_g
= lut_interp_linear(out_linear_g
,
822 transform
->output_gamma_lut_g
, transform
->output_gamma_lut_g_length
);
823 out_device_b
= lut_interp_linear(out_linear_b
,
824 transform
->output_gamma_lut_b
, transform
->output_gamma_lut_b_length
);
826 dest
[r_out
] = clamp_u8(out_device_r
*255);
827 dest
[1] = clamp_u8(out_device_g
*255);
828 dest
[b_out
] = clamp_u8(out_device_b
*255);
835 static void qcms_transform_data_rgb_out_linear(qcms_transform
*transform
, unsigned char *src
, unsigned char *dest
, size_t length
, qcms_format_type output_format
)
837 const int r_out
= output_format
.r
;
838 const int b_out
= output_format
.b
;
841 float (*mat
)[4] = transform
->matrix
;
842 for (i
= 0; i
< length
; i
++) {
843 unsigned char device_r
= *src
++;
844 unsigned char device_g
= *src
++;
845 unsigned char device_b
= *src
++;
847 float linear_r
= transform
->input_gamma_table_r
[device_r
];
848 float linear_g
= transform
->input_gamma_table_g
[device_g
];
849 float linear_b
= transform
->input_gamma_table_b
[device_b
];
851 float out_linear_r
= mat
[0][0]*linear_r
+ mat
[1][0]*linear_g
+ mat
[2][0]*linear_b
;
852 float out_linear_g
= mat
[0][1]*linear_r
+ mat
[1][1]*linear_g
+ mat
[2][1]*linear_b
;
853 float out_linear_b
= mat
[0][2]*linear_r
+ mat
[1][2]*linear_g
+ mat
[2][2]*linear_b
;
855 dest
[r_out
] = clamp_u8(out_linear_r
*255);
856 dest
[1] = clamp_u8(out_linear_g
*255);
857 dest
[b_out
] = clamp_u8(out_linear_b
*255);
864 * If users create and destroy objects on different threads, even if the same
865 * objects aren't used on different threads at the same time, we can still run
866 * in to trouble with refcounts if they aren't atomic.
868 * This can lead to us prematurely deleting the precache if threads get unlucky
869 * and write the wrong value to the ref count.
871 static struct precache_output
*precache_reference(struct precache_output
*p
)
873 qcms_atomic_increment(p
->ref_count
);
877 static struct precache_output
*precache_create()
879 struct precache_output
*p
= malloc(sizeof(struct precache_output
));
885 void precache_release(struct precache_output
*p
)
887 if (qcms_atomic_decrement(p
->ref_count
) == 0) {
892 #ifdef HAVE_POSIX_MEMALIGN
893 static qcms_transform
*transform_alloc(void)
896 if (!posix_memalign(&t
, 16, sizeof(*t
))) {
902 static void transform_free(qcms_transform
*t
)
907 static qcms_transform
*transform_alloc(void)
909 /* transform needs to be aligned on a 16byte boundrary */
910 char *original_block
= calloc(sizeof(qcms_transform
) + sizeof(void*) + 16, 1);
911 /* make room for a pointer to the block returned by calloc */
912 void *transform_start
= original_block
+ sizeof(void*);
913 /* align transform_start */
914 qcms_transform
*transform_aligned
= (qcms_transform
*)(((uintptr_t)transform_start
+ 15) & ~0xf);
916 /* store a pointer to the block returned by calloc so that we can free it later */
917 void **(original_block_ptr
) = (void**)transform_aligned
;
920 original_block_ptr
--;
921 *original_block_ptr
= original_block
;
923 return transform_aligned
;
925 static void transform_free(qcms_transform
*t
)
927 /* get at the pointer to the unaligned block returned by calloc */
928 void **p
= (void**)t
;
934 void qcms_transform_release(qcms_transform
*t
)
936 /* ensure we only free the gamma tables once even if there are
937 * multiple references to the same data */
939 if (t
->output_table_r
)
940 precache_release(t
->output_table_r
);
941 if (t
->output_table_g
)
942 precache_release(t
->output_table_g
);
943 if (t
->output_table_b
)
944 precache_release(t
->output_table_b
);
946 free(t
->input_gamma_table_r
);
947 if (t
->input_gamma_table_g
!= t
->input_gamma_table_r
)
948 free(t
->input_gamma_table_g
);
949 if (t
->input_gamma_table_g
!= t
->input_gamma_table_r
&&
950 t
->input_gamma_table_g
!= t
->input_gamma_table_b
)
951 free(t
->input_gamma_table_b
);
953 free(t
->input_gamma_table_gray
);
955 free(t
->output_gamma_lut_r
);
956 free(t
->output_gamma_lut_g
);
957 free(t
->output_gamma_lut_b
);
963 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
965 // -------------------------------------------------------------------------
966 #if defined(_M_IX86) && defined(_MSC_VER)
968 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
969 register - I'm not sure if that ever happens on windows, but cpuid isn't
970 on the critical path so we just preserve the register to be safe and to be
971 consistent with the non-windows version. */
972 static void cpuid(uint32_t fxn
, uint32_t *a
, uint32_t *b
, uint32_t *c
, uint32_t *d
) {
973 uint32_t a_
, b_
, c_
, d_
;
989 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
991 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
992 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
993 static void cpuid(uint32_t fxn
, uint32_t *a
, uint32_t *b
, uint32_t *c
, uint32_t *d
) {
995 uint32_t a_
, b_
, c_
, d_
;
996 __asm__
__volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
997 : "=a" (a_
), "=S" (b_
), "=c" (c_
), "=d" (d_
) : "a" (fxn
));
1005 // -------------------------Runtime SSEx Detection-----------------------------
1007 /* MMX is always supported per
1008 * Gecko v1.9.1 minimum CPU requirements */
1009 #define SSE1_EDX_MASK (1UL << 25)
1010 #define SSE2_EDX_MASK (1UL << 26)
1011 #define SSE3_ECX_MASK (1UL << 0)
1013 static int sse_version_available(void)
1015 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1016 /* we know at build time that 64-bit CPUs always have SSE2
1017 * this tells the compiler that non-SSE2 branches will never be
1018 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1020 #elif defined(HAS_CPUID)
1021 static int sse_version
= -1;
1022 uint32_t a
, b
, c
, d
;
1023 uint32_t function
= 0x00000001;
1025 if (sse_version
== -1) {
1027 cpuid(function
, &a
, &b
, &c
, &d
);
1028 if (c
& SSE3_ECX_MASK
)
1030 else if (d
& SSE2_EDX_MASK
)
1032 else if (d
& SSE1_EDX_MASK
)
1043 static const struct matrix bradford_matrix
= {{ { 0.8951f
, 0.2664f
,-0.1614f
},
1044 {-0.7502f
, 1.7135f
, 0.0367f
},
1045 { 0.0389f
,-0.0685f
, 1.0296f
}},
1048 static const struct matrix bradford_matrix_inv
= {{ { 0.9869929f
,-0.1470543f
, 0.1599627f
},
1049 { 0.4323053f
, 0.5183603f
, 0.0492912f
},
1050 {-0.0085287f
, 0.0400428f
, 0.9684867f
}},
1054 struct matrix
compute_whitepoint_adaption(float X
, float Y
, float Z
) {
1055 float p
= (0.96422f
*bradford_matrix
.m
[0][0] + 1.000f
*bradford_matrix
.m
[1][0] + 0.82521f
*bradford_matrix
.m
[2][0]) /
1056 (X
*bradford_matrix
.m
[0][0] + Y
*bradford_matrix
.m
[1][0] + Z
*bradford_matrix
.m
[2][0] );
1057 float y
= (0.96422f
*bradford_matrix
.m
[0][1] + 1.000f
*bradford_matrix
.m
[1][1] + 0.82521f
*bradford_matrix
.m
[2][1]) /
1058 (X
*bradford_matrix
.m
[0][1] + Y
*bradford_matrix
.m
[1][1] + Z
*bradford_matrix
.m
[2][1] );
1059 float b
= (0.96422f
*bradford_matrix
.m
[0][2] + 1.000f
*bradford_matrix
.m
[1][2] + 0.82521f
*bradford_matrix
.m
[2][2]) /
1060 (X
*bradford_matrix
.m
[0][2] + Y
*bradford_matrix
.m
[1][2] + Z
*bradford_matrix
.m
[2][2] );
1061 struct matrix white_adaption
= {{ {p
,0,0}, {0,y
,0}, {0,0,b
}}, false};
1062 return matrix_multiply( bradford_matrix_inv
, matrix_multiply(white_adaption
, bradford_matrix
) );
1065 void qcms_profile_precache_output_transform(qcms_profile
*profile
)
1067 /* we only support precaching on rgb profiles */
1068 if (profile
->color_space
!= RGB_SIGNATURE
)
1071 if (qcms_supports_iccv4
) {
1072 /* don't precache since we will use the B2A LUT */
1076 /* don't precache since we will use the mBA LUT */
1081 /* don't precache if we do not have the TRC curves */
1082 if (!profile
->redTRC
|| !profile
->greenTRC
|| !profile
->blueTRC
)
1085 if (!profile
->output_table_r
) {
1086 profile
->output_table_r
= precache_create();
1087 if (profile
->output_table_r
&&
1088 !compute_precache(profile
->redTRC
, profile
->output_table_r
->data
)) {
1089 precache_release(profile
->output_table_r
);
1090 profile
->output_table_r
= NULL
;
1093 if (!profile
->output_table_g
) {
1094 profile
->output_table_g
= precache_create();
1095 if (profile
->output_table_g
&&
1096 !compute_precache(profile
->greenTRC
, profile
->output_table_g
->data
)) {
1097 precache_release(profile
->output_table_g
);
1098 profile
->output_table_g
= NULL
;
1101 if (!profile
->output_table_b
) {
1102 profile
->output_table_b
= precache_create();
1103 if (profile
->output_table_b
&&
1104 !compute_precache(profile
->blueTRC
, profile
->output_table_b
->data
)) {
1105 precache_release(profile
->output_table_b
);
1106 profile
->output_table_b
= NULL
;
1111 /* Replace the current transformation with a LUT transformation using a given number of sample points */
1112 qcms_transform
* qcms_transform_precacheLUT_float(qcms_transform
*transform
, qcms_profile
*in
, qcms_profile
*out
,
1113 int samples
, qcms_data_type in_type
)
1115 /* The range between which 2 consecutive sample points can be used to interpolate */
1118 uint32_t lutSize
= 3 * samples
* samples
* samples
;
1124 src
= malloc(lutSize
*sizeof(float));
1125 dest
= malloc(lutSize
*sizeof(float));
1128 /* Prepare a list of points we want to sample: x, y, z order */
1130 inverse
= 1 / (float)(samples
-1);
1131 for (x
= 0; x
< samples
; x
++) {
1132 for (y
= 0; y
< samples
; y
++) {
1133 for (z
= 0; z
< samples
; z
++) {
1134 src
[l
++] = x
* inverse
; // r
1135 src
[l
++] = y
* inverse
; // g
1136 src
[l
++] = z
* inverse
; // b
1141 lut
= qcms_chain_transform(in
, out
, src
, dest
, lutSize
);
1144 transform
->r_clut
= &lut
[0]; // r
1145 transform
->g_clut
= &lut
[1]; // g
1146 transform
->b_clut
= &lut
[2]; // b
1147 transform
->grid_size
= samples
;
1149 if (in_type
== QCMS_DATA_RGBA_8
) {
1150 #if defined(SSE2_ENABLE)
1151 if (sse_version_available() >= 2) {
1152 transform
->transform_fn
= qcms_transform_data_tetra_clut_rgba_sse2
;
1154 transform
->transform_fn
= qcms_transform_data_tetra_clut_rgba
;
1157 transform
->transform_fn
= qcms_transform_data_tetra_clut_rgba
;
1160 transform
->transform_fn
= qcms_transform_data_tetra_clut
;
1165 // XXX: qcms_modular_transform_data may return the lut in either the src or the
1166 // dest buffer. If so, it must not be free-ed.
1167 if (src
&& lut
!= src
) {
1170 if (dest
&& lut
!= dest
) {
1180 /* Create a transform LUT using the given number of sample points. The transform LUT data is stored
1181 in the output (cube) in bgra format in zyx sample order. */
1182 qcms_bool
qcms_transform_create_LUT_zyx_bgra(qcms_profile
*in
, qcms_profile
*out
, qcms_intent intent
,
1183 int samples
, unsigned char* cube
)
1187 uint32_t lutSize
= 3 * samples
* samples
* samples
;
1194 src
= malloc(lutSize
*sizeof(float));
1195 dest
= malloc(lutSize
*sizeof(float));
1198 /* Prepare a list of points we want to sample: z, y, x order */
1200 inverse
= 1 / (float)(samples
-1);
1201 for (z
= 0; z
< samples
; z
++) {
1202 for (y
= 0; y
< samples
; y
++) {
1203 for (x
= 0; x
< samples
; x
++) {
1204 src
[l
++] = x
* inverse
; // r
1205 src
[l
++] = y
* inverse
; // g
1206 src
[l
++] = z
* inverse
; // b
1211 lut
= qcms_chain_transform(in
, out
, src
, dest
, lutSize
);
1215 for (z
= 0; z
< samples
; z
++) {
1216 for (y
= 0; y
< samples
; y
++) {
1217 for (x
= 0; x
< samples
; x
++) {
1218 cube
[index
++] = (int)floorf(lut
[l
+ 2] * 255.0f
+ 0.5f
); // b
1219 cube
[index
++] = (int)floorf(lut
[l
+ 1] * 255.0f
+ 0.5f
); // g
1220 cube
[index
++] = (int)floorf(lut
[l
+ 0] * 255.0f
+ 0.5f
); // r
1221 cube
[index
++] = 255; // a
1229 // XXX: qcms_modular_transform_data may return the lut data in either the src or
1230 // dest buffer so free src, dest, and lut with care.
1232 if (src
&& lut
!= src
)
1234 if (dest
&& lut
!= dest
)
1245 #define NO_MEM_TRANSFORM NULL
1247 qcms_transform
* qcms_transform_create(
1248 qcms_profile
*in
, qcms_data_type in_type
,
1249 qcms_profile
*out
, qcms_data_type out_type
,
1252 qcms_transform
*transform
= NULL
;
1253 bool precache
= false;
1256 transform
= transform_alloc();
1261 if (out_type
!= QCMS_DATA_RGB_8
&& out_type
!= QCMS_DATA_RGBA_8
) {
1262 assert(0 && "output type");
1263 qcms_transform_release(transform
);
1267 transform
->transform_flags
= 0;
1269 if (out
->output_table_r
&& out
->output_table_g
&& out
->output_table_b
) {
1273 if (qcms_supports_iccv4
&& (in
->A2B0
|| out
->B2A0
|| in
->mAB
|| out
->mAB
)) {
1274 // Precache the transformation to a CLUT 33x33x33 in size.
1275 // 33 is used by many profiles and works well in practice.
1276 // This evenly divides 256 into blocks of 8x8x8.
1277 // TODO For transforming small data sets of about 200x200 or less
1278 // precaching should be avoided.
1279 qcms_transform
*result
= qcms_transform_precacheLUT_float(transform
, in
, out
, 33, in_type
);
1281 assert(0 && "precacheLUT failed");
1282 qcms_transform_release(transform
);
1288 /* A matrix-based transform will be selected: check that the PCS
1289 of the input/output profiles are the same, crbug.com/5120682 */
1290 if (in
->pcs
!= out
->pcs
) {
1291 qcms_transform_release(transform
);
1296 transform
->output_table_r
= precache_reference(out
->output_table_r
);
1297 transform
->output_table_g
= precache_reference(out
->output_table_g
);
1298 transform
->output_table_b
= precache_reference(out
->output_table_b
);
1300 if (!out
->redTRC
|| !out
->greenTRC
|| !out
->blueTRC
) {
1301 qcms_transform_release(transform
);
1302 return NO_MEM_TRANSFORM
;
1305 build_output_lut(out
->redTRC
, &transform
->output_gamma_lut_r
, &transform
->output_gamma_lut_r_length
);
1306 build_output_lut(out
->greenTRC
, &transform
->output_gamma_lut_g
, &transform
->output_gamma_lut_g_length
);
1307 build_output_lut(out
->blueTRC
, &transform
->output_gamma_lut_b
, &transform
->output_gamma_lut_b_length
);
1309 if (!transform
->output_gamma_lut_r
|| !transform
->output_gamma_lut_g
|| !transform
->output_gamma_lut_b
) {
1310 qcms_transform_release(transform
);
1311 return NO_MEM_TRANSFORM
;
1315 if (in
->color_space
== RGB_SIGNATURE
) {
1316 struct matrix in_matrix
, out_matrix
, result
;
1318 if (in_type
!= QCMS_DATA_RGB_8
&& in_type
!= QCMS_DATA_RGBA_8
) {
1319 assert(0 && "input type");
1320 qcms_transform_release(transform
);
1325 #if defined(SSE2_ENABLE)
1326 if (sse_version_available() >= 2) {
1327 if (in_type
== QCMS_DATA_RGB_8
)
1328 transform
->transform_fn
= qcms_transform_data_rgb_out_lut_sse2
;
1330 transform
->transform_fn
= qcms_transform_data_rgba_out_lut_sse2
;
1334 if (in_type
== QCMS_DATA_RGB_8
)
1335 transform
->transform_fn
= qcms_transform_data_rgb_out_lut_precache
;
1337 transform
->transform_fn
= qcms_transform_data_rgba_out_lut_precache
;
1340 if (in_type
== QCMS_DATA_RGB_8
)
1341 transform
->transform_fn
= qcms_transform_data_rgb_out_lut
;
1343 transform
->transform_fn
= qcms_transform_data_rgba_out_lut
;
1346 //XXX: avoid duplicating tables if we can
1347 transform
->input_gamma_table_r
= build_input_gamma_table(in
->redTRC
);
1348 transform
->input_gamma_table_g
= build_input_gamma_table(in
->greenTRC
);
1349 transform
->input_gamma_table_b
= build_input_gamma_table(in
->blueTRC
);
1351 if (!transform
->input_gamma_table_r
|| !transform
->input_gamma_table_g
|| !transform
->input_gamma_table_b
) {
1352 qcms_transform_release(transform
);
1353 return NO_MEM_TRANSFORM
;
1356 /* build combined colorant matrix */
1357 in_matrix
= build_colorant_matrix(in
);
1358 out_matrix
= build_colorant_matrix(out
);
1359 out_matrix
= matrix_invert(out_matrix
);
1360 if (out_matrix
.invalid
) {
1361 qcms_transform_release(transform
);
1364 result
= matrix_multiply(out_matrix
, in_matrix
);
1366 /* check for NaN values in the matrix and bail if we find any
1367 see also https://bugzilla.mozilla.org/show_bug.cgi?id=1170316 */
1368 for (i
= 0 ; i
< 3 ; ++i
) {
1369 for (j
= 0 ; j
< 3 ; ++j
) {
1370 if (result
.m
[i
][j
] != result
.m
[i
][j
]) {
1371 qcms_transform_release(transform
);
1377 /* store the results in column major mode
1378 * this makes doing the multiplication with sse easier */
1379 transform
->matrix
[0][0] = result
.m
[0][0];
1380 transform
->matrix
[1][0] = result
.m
[0][1];
1381 transform
->matrix
[2][0] = result
.m
[0][2];
1382 transform
->matrix
[0][1] = result
.m
[1][0];
1383 transform
->matrix
[1][1] = result
.m
[1][1];
1384 transform
->matrix
[2][1] = result
.m
[1][2];
1385 transform
->matrix
[0][2] = result
.m
[2][0];
1386 transform
->matrix
[1][2] = result
.m
[2][1];
1387 transform
->matrix
[2][2] = result
.m
[2][2];
1389 /* Flag transform as matrix. */
1390 transform
->transform_flags
|= TRANSFORM_FLAG_MATRIX
;
1392 } else if (in
->color_space
== GRAY_SIGNATURE
) {
1393 if (in_type
!= QCMS_DATA_GRAY_8
&& in_type
!= QCMS_DATA_GRAYA_8
) {
1394 assert(0 && "input type");
1395 qcms_transform_release(transform
);
1399 transform
->input_gamma_table_gray
= build_input_gamma_table(in
->grayTRC
);
1401 if (!transform
->input_gamma_table_gray
) {
1402 qcms_transform_release(transform
);
1403 return NO_MEM_TRANSFORM
;
1407 if (in_type
== QCMS_DATA_GRAY_8
) {
1408 transform
->transform_fn
= qcms_transform_data_gray_out_precache
;
1410 transform
->transform_fn
= qcms_transform_data_graya_out_precache
;
1413 if (in_type
== QCMS_DATA_GRAY_8
) {
1414 transform
->transform_fn
= qcms_transform_data_gray_out_lut
;
1416 transform
->transform_fn
= qcms_transform_data_graya_out_lut
;
1420 assert(0 && "unexpected colorspace");
1421 qcms_transform_release(transform
);
1428 /* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
1429 * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
1430 * of the attribute but is currently only supported by clang */
1431 #if defined(__has_attribute)
1432 #define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
1433 #elif defined(__GNUC__) && defined(__i386__)
1434 #define HAS_FORCE_ALIGN_ARG_POINTER 1
1436 #define HAS_FORCE_ALIGN_ARG_POINTER 0
1439 #if HAS_FORCE_ALIGN_ARG_POINTER
1440 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1441 __attribute__((__force_align_arg_pointer__
))
1443 void qcms_transform_data(qcms_transform
*transform
, void *src
, void *dest
, size_t length
)
1445 static const struct _qcms_format_type output_rgbx
= { 0, 2 };
1447 transform
->transform_fn(transform
, src
, dest
, length
, output_rgbx
);
1450 void qcms_transform_data_type(qcms_transform
*transform
, void *src
, void *dest
, size_t length
, qcms_output_type type
)
1452 static const struct _qcms_format_type output_rgbx
= { 0, 2 };
1453 static const struct _qcms_format_type output_bgrx
= { 2, 0 };
1455 transform
->transform_fn(transform
, src
, dest
, length
, type
== QCMS_OUTPUT_BGRX
? output_bgrx
: output_rgbx
);
1458 qcms_bool qcms_supports_iccv4
;
1460 void qcms_enable_iccv4()
1462 qcms_supports_iccv4
= true;
1465 static inline qcms_bool
transform_is_matrix(qcms_transform
*t
)
1467 return (t
->transform_flags
& TRANSFORM_FLAG_MATRIX
) ? true : false;
1470 qcms_bool
qcms_transform_is_matrix(qcms_transform
*t
)
1472 return transform_is_matrix(t
);
1475 float qcms_transform_get_matrix(qcms_transform
*t
, unsigned i
, unsigned j
)
1477 assert(transform_is_matrix(t
) && i
< 3 && j
< 3);
1479 // Return transform matrix element in row major order (permute i and j)
1481 return t
->matrix
[j
][i
];
1484 static inline qcms_bool
supported_trc_type(qcms_trc_type type
)
1486 return type
== QCMS_TRC_HALF_FLOAT
;
1489 const uint16_t half_float_one
= 0x3c00;
1491 size_t qcms_transform_get_input_trc_rgba(qcms_transform
*t
, qcms_profile
*in
, qcms_trc_type type
, unsigned short *data
)
1493 const size_t size
= 256; // The input gamma tables always have 256 entries.
1497 if (in
->color_space
!= RGB_SIGNATURE
|| !supported_trc_type(type
))
1500 // qcms_profile *in is assumed to be the profile on the input-side of the color transform t.
1501 // When a transform is created, the input gamma curve data is stored in the transform ...
1503 if (!t
->input_gamma_table_r
|| !t
->input_gamma_table_g
|| !t
->input_gamma_table_b
)
1506 // Report the size if no output data is requested. This allows callers to first work out the
1507 // the curve size, then provide allocated memory sufficient to store the curve rgba data.
1512 for (i
= 0; i
< size
; ++i
) {
1513 *data
++ = float_to_half_float(t
->input_gamma_table_r
[i
]); // r
1514 *data
++ = float_to_half_float(t
->input_gamma_table_g
[i
]); // g
1515 *data
++ = float_to_half_float(t
->input_gamma_table_b
[i
]); // b
1516 *data
++ = half_float_one
; // a
1522 const float inverse65535
= (float) (1.0 / 65535.0);
1524 size_t qcms_transform_get_output_trc_rgba(qcms_transform
*t
, qcms_profile
*out
, qcms_trc_type type
, unsigned short *data
)
1528 if (out
->color_space
!= RGB_SIGNATURE
|| !supported_trc_type(type
))
1531 // qcms_profile *out is assumed to be the profile on the output-side of the transform t.
1532 // If the transform output gamma curves need building, do that. They're usually built when
1533 // the transform was created, but sometimes not due to the output gamma precache ...
1535 if (!out
->redTRC
|| !out
->greenTRC
|| !out
->blueTRC
)
1537 if (!t
->output_gamma_lut_r
)
1538 build_output_lut(out
->redTRC
, &t
->output_gamma_lut_r
, &t
->output_gamma_lut_r_length
);
1539 if (!t
->output_gamma_lut_g
)
1540 build_output_lut(out
->greenTRC
, &t
->output_gamma_lut_g
, &t
->output_gamma_lut_g_length
);
1541 if (!t
->output_gamma_lut_b
)
1542 build_output_lut(out
->blueTRC
, &t
->output_gamma_lut_b
, &t
->output_gamma_lut_b_length
);
1544 if (!t
->output_gamma_lut_r
|| !t
->output_gamma_lut_g
|| !t
->output_gamma_lut_b
)
1547 // Output gamma tables should have the same size and should have 4096 entries at most (the
1548 // minimum is 256). Larger tables are rare and ignored here: fail by returning 0.
1550 size
= t
->output_gamma_lut_r_length
;
1551 if (size
!= t
->output_gamma_lut_g_length
)
1553 if (size
!= t
->output_gamma_lut_b_length
)
1555 if (size
< 256 || size
> 4096)
1558 // Report the size if no output data is requested. This allows callers to first work out the
1559 // the curve size, then provide allocated memory sufficient to store the curve rgba data.
1564 for (i
= 0; i
< size
; ++i
) {
1565 *data
++ = float_to_half_float(t
->output_gamma_lut_r
[i
] * inverse65535
); // r
1566 *data
++ = float_to_half_float(t
->output_gamma_lut_g
[i
] * inverse65535
); // g
1567 *data
++ = float_to_half_float(t
->output_gamma_lut_b
[i
] * inverse65535
); // b
1568 *data
++ = half_float_one
; // a