third_party/qcms/src/transform.c

   1 /* vim: set ts=8 sw=8 noexpandtab: */
   2 //  qcms
   3 //  Copyright (C) 2009 Mozilla Corporation
   4 //  Copyright (C) 1998-2007 Marti Maria
   5 //
   6 // Permission is hereby granted, free of charge, to any person obtaining
   7 // a copy of this software and associated documentation files (the "Software"),
   8 // to deal in the Software without restriction, including without limitation
   9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 // and/or sell copies of the Software, and to permit persons to whom the Software
  11 // is furnished to do so, subject to the following conditions:
  12 //
  13 // The above copyright notice and this permission notice shall be included in
  14 // all copies or substantial portions of the Software.
  15 //
  16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23
  24 #include <stdlib.h>
  25 #include <math.h>
  26 #include <assert.h>
  27 #include <string.h> //memcpy
  28 #include "qcmsint.h"
  29 #include "chain.h"
  30 #include "halffloat.h"
  31 #include "matrix.h"
  32 #include "transform_util.h"
  33
  34 /* for MSVC, GCC, Intel, and Sun compilers */
  35 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
  36 #define X86
  37 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
  38
  39 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
  40 // This is just an approximation, I am not handling all the non-linear
  41 // aspects of the RGB to XYZ process, and assumming that the gamma correction
  42 // has transitive property in the tranformation chain.
  43 //
  44 // the alghoritm:
  45 //
  46 //            - First I build the absolute conversion matrix using
  47 //              primaries in XYZ. This matrix is next inverted
  48 //            - Then I eval the source white point across this matrix
  49 //              obtaining the coeficients of the transformation
  50 //            - Then, I apply these coeficients to the original matrix
  51 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
  52 {
  53         struct matrix primaries;
  54         struct matrix primaries_invert;
  55         struct matrix result;
  56         struct vector white_point;
  57         struct vector coefs;
  58
  59         double xn, yn;
  60         double xr, yr;
  61         double xg, yg;
  62         double xb, yb;
  63
  64         xn = white.x;
  65         yn = white.y;
  66
  67         if (yn == 0.0)
  68                 return matrix_invalid();
  69
  70         xr = primrs.red.x;
  71         yr = primrs.red.y;
  72         xg = primrs.green.x;
  73         yg = primrs.green.y;
  74         xb = primrs.blue.x;
  75         yb = primrs.blue.y;
  76
  77         primaries.m[0][0] = xr;
  78         primaries.m[0][1] = xg;
  79         primaries.m[0][2] = xb;
  80
  81         primaries.m[1][0] = yr;
  82         primaries.m[1][1] = yg;
  83         primaries.m[1][2] = yb;
  84
  85         primaries.m[2][0] = 1 - xr - yr;
  86         primaries.m[2][1] = 1 - xg - yg;
  87         primaries.m[2][2] = 1 - xb - yb;
  88         primaries.invalid = false;
  89
  90         white_point.v[0] = xn/yn;
  91         white_point.v[1] = 1.;
  92         white_point.v[2] = (1.0-xn-yn)/yn;
  93
  94         primaries_invert = matrix_invert(primaries);
  95
  96         coefs = matrix_eval(primaries_invert, white_point);
  97
  98         result.m[0][0] = coefs.v[0]*xr;
  99         result.m[0][1] = coefs.v[1]*xg;
 100         result.m[0][2] = coefs.v[2]*xb;
 101
 102         result.m[1][0] = coefs.v[0]*yr;
 103         result.m[1][1] = coefs.v[1]*yg;
 104         result.m[1][2] = coefs.v[2]*yb;
 105
 106         result.m[2][0] = coefs.v[0]*(1.-xr-yr);
 107         result.m[2][1] = coefs.v[1]*(1.-xg-yg);
 108         result.m[2][2] = coefs.v[2]*(1.-xb-yb);
 109         result.invalid = primaries_invert.invalid;
 110
 111         return result;
 112 }
 113
 114 struct CIE_XYZ {
 115         double X;
 116         double Y;
 117         double Z;
 118 };
 119
 120 /* CIE Illuminant D50 */
 121 static const struct CIE_XYZ D50_XYZ = {
 122         0.9642,
 123         1.0000,
 124         0.8249
 125 };
 126
 127 /* from lcms: xyY2XYZ()
 128  * corresponds to argyll: icmYxy2XYZ() */
 129 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
 130 {
 131         struct CIE_XYZ dest;
 132         dest.X = (source.x / source.y) * source.Y;
 133         dest.Y = source.Y;
 134         dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
 135         return dest;
 136 }
 137
 138 /* from lcms: ComputeChromaticAdaption */
 139 // Compute chromatic adaption matrix using chad as cone matrix
 140 static struct matrix
 141 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
 142                            struct CIE_XYZ dest_white_point,
 143                            struct matrix chad)
 144 {
 145         struct matrix chad_inv;
 146         struct vector cone_source_XYZ, cone_source_rgb;
 147         struct vector cone_dest_XYZ, cone_dest_rgb;
 148         struct matrix cone, tmp;
 149
 150         tmp = chad;
 151         chad_inv = matrix_invert(tmp);
 152
 153         cone_source_XYZ.v[0] = source_white_point.X;
 154         cone_source_XYZ.v[1] = source_white_point.Y;
 155         cone_source_XYZ.v[2] = source_white_point.Z;
 156
 157         cone_dest_XYZ.v[0] = dest_white_point.X;
 158         cone_dest_XYZ.v[1] = dest_white_point.Y;
 159         cone_dest_XYZ.v[2] = dest_white_point.Z;
 160
 161         cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
 162         cone_dest_rgb   = matrix_eval(chad, cone_dest_XYZ);
 163
 164         cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
 165         cone.m[0][1] = 0;
 166         cone.m[0][2] = 0;
 167         cone.m[1][0] = 0;
 168         cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
 169         cone.m[1][2] = 0;
 170         cone.m[2][0] = 0;
 171         cone.m[2][1] = 0;
 172         cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
 173         cone.invalid = false;
 174
 175         // Normalize
 176         return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
 177 }
 178
 179 /* from lcms: cmsAdaptionMatrix */
 180 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
 181 // Bradford is assumed
 182 static struct matrix
 183 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
 184 {
 185 #if defined (_MSC_VER)
 186 #pragma warning(push)
 187 /* Disable double to float truncation warning 4305 */
 188 #pragma warning(disable:4305)
 189 #endif
 190         struct matrix lam_rigg = {{ // Bradford matrix
 191                                  {  0.8951,  0.2664, -0.1614 },
 192                                  { -0.7502,  1.7135,  0.0367 },
 193                                  {  0.0389, -0.0685,  1.0296 }
 194                                  }};
 195 #if defined (_MSC_VER)
 196 /* Restore warnings */
 197 #pragma warning(pop)
 198 #endif
 199         return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
 200 }
 201
 202 /* from lcms: cmsAdaptMatrixToD50 */
 203 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
 204 {
 205         struct CIE_XYZ Dn;
 206         struct matrix Bradford;
 207
 208         if (source_white_pt.y == 0.0)
 209                 return matrix_invalid();
 210
 211         Dn = xyY2XYZ(source_white_pt);
 212
 213         Bradford = adaption_matrix(Dn, D50_XYZ);
 214         return matrix_multiply(Bradford, r);
 215 }
 216
 217 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
 218 {
 219         struct matrix colorants;
 220         colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
 221         colorants = adapt_matrix_to_D50(colorants, white_point);
 222
 223         if (colorants.invalid)
 224                 return false;
 225
 226         /* note: there's a transpose type of operation going on here */
 227         profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
 228         profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
 229         profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
 230
 231         profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
 232         profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
 233         profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
 234
 235         profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
 236         profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
 237         profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
 238
 239         return true;
 240 }
 241
 242 #if 0
 243 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 244 {
 245         const int r_out = output_format.r;
 246         const int b_out = output_format.b;
 247
 248         int i;
 249         float (*mat)[4] = transform->matrix;
 250         for (i=0; i<length; i++) {
 251                 unsigned char device_r = *src++;
 252                 unsigned char device_g = *src++;
 253                 unsigned char device_b = *src++;
 254
 255                 float linear_r = transform->input_gamma_table_r[device_r];
 256                 float linear_g = transform->input_gamma_table_g[device_g];
 257                 float linear_b = transform->input_gamma_table_b[device_b];
 258
 259                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 260                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 261                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 262
 263                 float out_device_r = pow(out_linear_r, transform->out_gamma_r);
 264                 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
 265                 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
 266
 267                 dest[r_out] = clamp_u8(out_device_r*255);
 268                 dest[1]     = clamp_u8(out_device_g*255);
 269                 dest[b_out] = clamp_u8(out_device_b*255);
 270                 dest += 3;
 271         }
 272 }
 273 #endif
 274
 275 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 276 {
 277         const int r_out = output_format.r;
 278         const int b_out = output_format.b;
 279
 280         unsigned int i;
 281         for (i = 0; i < length; i++) {
 282                 float out_device_r, out_device_g, out_device_b;
 283                 unsigned char device = *src++;
 284
 285                 float linear = transform->input_gamma_table_gray[device];
 286
 287                 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 288                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 289                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 290
 291                 dest[r_out] = clamp_u8(out_device_r*255);
 292                 dest[1]     = clamp_u8(out_device_g*255);
 293                 dest[b_out] = clamp_u8(out_device_b*255);
 294                 dest += 3;
 295         }
 296 }
 297
 298 /* Alpha is not corrected.
 299    A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
 300    RGB Is?" Tech Memo 17 (December 14, 1998).
 301         See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
 302 */
 303
 304 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 305 {
 306         const int r_out = output_format.r;
 307         const int b_out = output_format.b;
 308
 309         unsigned int i;
 310         for (i = 0; i < length; i++) {
 311                 float out_device_r, out_device_g, out_device_b;
 312                 unsigned char device = *src++;
 313                 unsigned char alpha = *src++;
 314
 315                 float linear = transform->input_gamma_table_gray[device];
 316
 317                 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 318                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 319                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 320
 321                 dest[r_out] = clamp_u8(out_device_r*255);
 322                 dest[1]     = clamp_u8(out_device_g*255);
 323                 dest[b_out] = clamp_u8(out_device_b*255);
 324                 dest[3]     = alpha;
 325                 dest += 4;
 326         }
 327 }
 328
 329
 330 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 331 {
 332         const int r_out = output_format.r;
 333         const int b_out = output_format.b;
 334
 335         unsigned int i;
 336         for (i = 0; i < length; i++) {
 337                 unsigned char device = *src++;
 338                 uint16_t gray;
 339
 340                 float linear = transform->input_gamma_table_gray[device];
 341
 342                 /* we could round here... */
 343                 gray = linear * PRECACHE_OUTPUT_MAX;
 344
 345                 dest[r_out] = transform->output_table_r->data[gray];
 346                 dest[1]     = transform->output_table_g->data[gray];
 347                 dest[b_out] = transform->output_table_b->data[gray];
 348                 dest += 3;
 349         }
 350 }
 351
 352
 353 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 354 {
 355         const int r_out = output_format.r;
 356         const int b_out = output_format.b;
 357
 358         unsigned int i;
 359         for (i = 0; i < length; i++) {
 360                 unsigned char device = *src++;
 361                 unsigned char alpha = *src++;
 362                 uint16_t gray;
 363
 364                 float linear = transform->input_gamma_table_gray[device];
 365
 366                 /* we could round here... */
 367                 gray = linear * PRECACHE_OUTPUT_MAX;
 368
 369                 dest[r_out] = transform->output_table_r->data[gray];
 370                 dest[1]     = transform->output_table_g->data[gray];
 371                 dest[b_out] = transform->output_table_b->data[gray];
 372                 dest[3]     = alpha;
 373                 dest += 4;
 374         }
 375 }
 376
 377 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 378 {
 379         const int r_out = output_format.r;
 380         const int b_out = output_format.b;
 381
 382         unsigned int i;
 383         float (*mat)[4] = transform->matrix;
 384         for (i = 0; i < length; i++) {
 385                 unsigned char device_r = *src++;
 386                 unsigned char device_g = *src++;
 387                 unsigned char device_b = *src++;
 388                 uint16_t r, g, b;
 389
 390                 float linear_r = transform->input_gamma_table_r[device_r];
 391                 float linear_g = transform->input_gamma_table_g[device_g];
 392                 float linear_b = transform->input_gamma_table_b[device_b];
 393
 394                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 395                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 396                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 397
 398                 out_linear_r = clamp_float(out_linear_r);
 399                 out_linear_g = clamp_float(out_linear_g);
 400                 out_linear_b = clamp_float(out_linear_b);
 401
 402                 /* we could round here... */
 403                 r = out_linear_r * PRECACHE_OUTPUT_MAX;
 404                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 405                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 406
 407                 dest[r_out] = transform->output_table_r->data[r];
 408                 dest[1]     = transform->output_table_g->data[g];
 409                 dest[b_out] = transform->output_table_b->data[b];
 410                 dest += 3;
 411         }
 412 }
 413
 414 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 415 {
 416         const int r_out = output_format.r;
 417         const int b_out = output_format.b;
 418
 419         unsigned int i;
 420         float (*mat)[4] = transform->matrix;
 421         for (i = 0; i < length; i++) {
 422                 unsigned char device_r = *src++;
 423                 unsigned char device_g = *src++;
 424                 unsigned char device_b = *src++;
 425                 unsigned char alpha = *src++;
 426                 uint16_t r, g, b;
 427
 428                 float linear_r = transform->input_gamma_table_r[device_r];
 429                 float linear_g = transform->input_gamma_table_g[device_g];
 430                 float linear_b = transform->input_gamma_table_b[device_b];
 431
 432                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 433                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 434                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 435
 436                 out_linear_r = clamp_float(out_linear_r);
 437                 out_linear_g = clamp_float(out_linear_g);
 438                 out_linear_b = clamp_float(out_linear_b);
 439
 440                 /* we could round here... */
 441                 r = out_linear_r * PRECACHE_OUTPUT_MAX;
 442                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 443                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 444
 445                 dest[r_out] = transform->output_table_r->data[r];
 446                 dest[1]     = transform->output_table_g->data[g];
 447                 dest[b_out] = transform->output_table_b->data[b];
 448                 dest[3]     = alpha;
 449                 dest += 4;
 450         }
 451 }
 452
 453 // Not used
 454 /*
 455 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 456 {
 457         const int r_out = output_format.r;
 458         const int b_out = output_format.b;
 459
 460         unsigned int i;
 461         int xy_len = 1;
 462         int x_len = transform->grid_size;
 463         int len = x_len * x_len;
 464         float* r_table = transform->r_clut;
 465         float* g_table = transform->g_clut;
 466         float* b_table = transform->b_clut;
 467
 468         for (i = 0; i < length; i++) {
 469                 unsigned char in_r = *src++;
 470                 unsigned char in_g = *src++;
 471                 unsigned char in_b = *src++;
 472                 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
 473
 474                 int x = floor(linear_r * (transform->grid_size-1));
 475                 int y = floor(linear_g * (transform->grid_size-1));
 476                 int z = floor(linear_b * (transform->grid_size-1));
 477                 int x_n = ceil(linear_r * (transform->grid_size-1));
 478                 int y_n = ceil(linear_g * (transform->grid_size-1));
 479                 int z_n = ceil(linear_b * (transform->grid_size-1));
 480                 float x_d = linear_r * (transform->grid_size-1) - x;
 481                 float y_d = linear_g * (transform->grid_size-1) - y;
 482                 float z_d = linear_b * (transform->grid_size-1) - z;
 483
 484                 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
 485                 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
 486                 float r_y1 = lerp(r_x1, r_x2, y_d);
 487                 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
 488                 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
 489                 float r_y2 = lerp(r_x3, r_x4, y_d);
 490                 float clut_r = lerp(r_y1, r_y2, z_d);
 491
 492                 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
 493                 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
 494                 float g_y1 = lerp(g_x1, g_x2, y_d);
 495                 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
 496                 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
 497                 float g_y2 = lerp(g_x3, g_x4, y_d);
 498                 float clut_g = lerp(g_y1, g_y2, z_d);
 499
 500                 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
 501                 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
 502                 float b_y1 = lerp(b_x1, b_x2, y_d);
 503                 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
 504                 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
 505                 float b_y2 = lerp(b_x3, b_x4, y_d);
 506                 float clut_b = lerp(b_y1, b_y2, z_d);
 507
 508                 dest[r_out] = clamp_u8(clut_r*255.0f);
 509                 dest[1]     = clamp_u8(clut_g*255.0f);
 510                 dest[b_out] = clamp_u8(clut_b*255.0f);
 511                 dest += 3;
 512         }
 513 }
 514 */
 515
 516 // Using lcms' tetra interpolation algorithm.
 517 void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 518 {
 519         const int r_out = output_format.r;
 520         const int b_out = output_format.b;
 521
 522         unsigned int i;
 523         int xy_len = 1;
 524         int x_len = transform->grid_size;
 525         int len = x_len * x_len;
 526         float* r_table = transform->r_clut;
 527         float* g_table = transform->g_clut;
 528         float* b_table = transform->b_clut;
 529         float c0_r, c1_r, c2_r, c3_r;
 530         float c0_g, c1_g, c2_g, c3_g;
 531         float c0_b, c1_b, c2_b, c3_b;
 532         float clut_r, clut_g, clut_b;
 533         for (i = 0; i < length; i++) {
 534                 unsigned char in_r = *src++;
 535                 unsigned char in_g = *src++;
 536                 unsigned char in_b = *src++;
 537                 unsigned char in_a = *src++;
 538                 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
 539
 540                 int x = floor(linear_r * (transform->grid_size-1));
 541                 int y = floor(linear_g * (transform->grid_size-1));
 542                 int z = floor(linear_b * (transform->grid_size-1));
 543                 int x_n = ceil(linear_r * (transform->grid_size-1));
 544                 int y_n = ceil(linear_g * (transform->grid_size-1));
 545                 int z_n = ceil(linear_b * (transform->grid_size-1));
 546                 float rx = linear_r * (transform->grid_size-1) - x;
 547                 float ry = linear_g * (transform->grid_size-1) - y;
 548                 float rz = linear_b * (transform->grid_size-1) - z;
 549
 550                 c0_r = CLU(r_table, x, y, z);
 551                 c0_g = CLU(g_table, x, y, z);
 552                 c0_b = CLU(b_table, x, y, z);
 553
 554                 if( rx >= ry ) {
 555                         if (ry >= rz) { //rx >= ry && ry >= rz
 556                                 c1_r = CLU(r_table, x_n, y, z) - c0_r;
 557                                 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
 558                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 559                                 c1_g = CLU(g_table, x_n, y, z) - c0_g;
 560                                 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
 561                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 562                                 c1_b = CLU(b_table, x_n, y, z) - c0_b;
 563                                 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
 564                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 565                         } else {
 566                                 if (rx >= rz) { //rx >= rz && rz >= ry
 567                                         c1_r = CLU(r_table, x_n, y, z) - c0_r;
 568                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 569                                         c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
 570                                         c1_g = CLU(g_table, x_n, y, z) - c0_g;
 571                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 572                                         c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
 573                                         c1_b = CLU(b_table, x_n, y, z) - c0_b;
 574                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 575                                         c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
 576                                 } else { //rz > rx && rx >= ry
 577                                         c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
 578                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 579                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 580                                         c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
 581                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 582                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 583                                         c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
 584                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 585                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 586                                 }
 587                         }
 588                 } else {
 589                         if (rx >= rz) { //ry > rx && rx >= rz
 590                                 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
 591                                 c2_r = CLU(r_table, x, y_n, z) - c0_r;
 592                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 593                                 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
 594                                 c2_g = CLU(g_table, x, y_n, z) - c0_g;
 595                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 596                                 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
 597                                 c2_b = CLU(b_table, x, y_n, z) - c0_b;
 598                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 599                         } else {
 600                                 if (ry >= rz) { //ry >= rz && rz > rx
 601                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 602                                         c2_r = CLU(r_table, x, y_n, z) - c0_r;
 603                                         c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
 604                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 605                                         c2_g = CLU(g_table, x, y_n, z) - c0_g;
 606                                         c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
 607                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 608                                         c2_b = CLU(b_table, x, y_n, z) - c0_b;
 609                                         c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
 610                                 } else { //rz > ry && ry > rx
 611                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 612                                         c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
 613                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 614                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 615                                         c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
 616                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 617                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 618                                         c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
 619                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 620                                 }
 621                         }
 622                 }
 623
 624                 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
 625                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 626                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 627
 628                 dest[r_out] = clamp_u8(clut_r*255.0f);
 629                 dest[1]     = clamp_u8(clut_g*255.0f);
 630                 dest[b_out] = clamp_u8(clut_b*255.0f);
 631                 dest[3]     = in_a;
 632                 dest += 4;
 633         }
 634 }
 635
 636 // Using lcms' tetra interpolation code.
 637 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 638 {
 639         const int r_out = output_format.r;
 640         const int b_out = output_format.b;
 641
 642         unsigned int i;
 643         int xy_len = 1;
 644         int x_len = transform->grid_size;
 645         int len = x_len * x_len;
 646         float* r_table = transform->r_clut;
 647         float* g_table = transform->g_clut;
 648         float* b_table = transform->b_clut;
 649         float c0_r, c1_r, c2_r, c3_r;
 650         float c0_g, c1_g, c2_g, c3_g;
 651         float c0_b, c1_b, c2_b, c3_b;
 652         float clut_r, clut_g, clut_b;
 653         for (i = 0; i < length; i++) {
 654                 unsigned char in_r = *src++;
 655                 unsigned char in_g = *src++;
 656                 unsigned char in_b = *src++;
 657                 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
 658
 659                 int x = floor(linear_r * (transform->grid_size-1));
 660                 int y = floor(linear_g * (transform->grid_size-1));
 661                 int z = floor(linear_b * (transform->grid_size-1));
 662                 int x_n = ceil(linear_r * (transform->grid_size-1));
 663                 int y_n = ceil(linear_g * (transform->grid_size-1));
 664                 int z_n = ceil(linear_b * (transform->grid_size-1));
 665                 float rx = linear_r * (transform->grid_size-1) - x;
 666                 float ry = linear_g * (transform->grid_size-1) - y;
 667                 float rz = linear_b * (transform->grid_size-1) - z;
 668
 669                 c0_r = CLU(r_table, x, y, z);
 670                 c0_g = CLU(g_table, x, y, z);
 671                 c0_b = CLU(b_table, x, y, z);
 672
 673                 if( rx >= ry ) {
 674                         if (ry >= rz) { //rx >= ry && ry >= rz
 675                                 c1_r = CLU(r_table, x_n, y, z) - c0_r;
 676                                 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
 677                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 678                                 c1_g = CLU(g_table, x_n, y, z) - c0_g;
 679                                 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
 680                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 681                                 c1_b = CLU(b_table, x_n, y, z) - c0_b;
 682                                 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
 683                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 684                         } else {
 685                                 if (rx >= rz) { //rx >= rz && rz >= ry
 686                                         c1_r = CLU(r_table, x_n, y, z) - c0_r;
 687                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 688                                         c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
 689                                         c1_g = CLU(g_table, x_n, y, z) - c0_g;
 690                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 691                                         c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
 692                                         c1_b = CLU(b_table, x_n, y, z) - c0_b;
 693                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 694                                         c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
 695                                 } else { //rz > rx && rx >= ry
 696                                         c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
 697                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 698                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 699                                         c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
 700                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 701                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 702                                         c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
 703                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 704                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 705                                 }
 706                         }
 707                 } else {
 708                         if (rx >= rz) { //ry > rx && rx >= rz
 709                                 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
 710                                 c2_r = CLU(r_table, x, y_n, z) - c0_r;
 711                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 712                                 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
 713                                 c2_g = CLU(g_table, x, y_n, z) - c0_g;
 714                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 715                                 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
 716                                 c2_b = CLU(b_table, x, y_n, z) - c0_b;
 717                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 718                         } else {
 719                                 if (ry >= rz) { //ry >= rz && rz > rx
 720                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 721                                         c2_r = CLU(r_table, x, y_n, z) - c0_r;
 722                                         c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
 723                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 724                                         c2_g = CLU(g_table, x, y_n, z) - c0_g;
 725                                         c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
 726                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 727                                         c2_b = CLU(b_table, x, y_n, z) - c0_b;
 728                                         c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
 729                                 } else { //rz > ry && ry > rx
 730                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 731                                         c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
 732                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 733                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 734                                         c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
 735                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 736                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 737                                         c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
 738                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 739                                 }
 740                         }
 741                 }
 742
 743                 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
 744                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 745                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 746
 747                 dest[r_out] = clamp_u8(clut_r*255.0f);
 748                 dest[1]     = clamp_u8(clut_g*255.0f);
 749                 dest[b_out] = clamp_u8(clut_b*255.0f);
 750                 dest += 3;
 751         }
 752 }
 753
 754 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 755 {
 756         const int r_out = output_format.r;
 757         const int b_out = output_format.b;
 758
 759         unsigned int i;
 760         float (*mat)[4] = transform->matrix;
 761         for (i = 0; i < length; i++) {
 762                 unsigned char device_r = *src++;
 763                 unsigned char device_g = *src++;
 764                 unsigned char device_b = *src++;
 765                 float out_device_r, out_device_g, out_device_b;
 766
 767                 float linear_r = transform->input_gamma_table_r[device_r];
 768                 float linear_g = transform->input_gamma_table_g[device_g];
 769                 float linear_b = transform->input_gamma_table_b[device_b];
 770
 771                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 772                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 773                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 774
 775                 out_linear_r = clamp_float(out_linear_r);
 776                 out_linear_g = clamp_float(out_linear_g);
 777                 out_linear_b = clamp_float(out_linear_b);
 778
 779                 out_device_r = lut_interp_linear(out_linear_r,
 780                                 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 781                 out_device_g = lut_interp_linear(out_linear_g,
 782                                 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 783                 out_device_b = lut_interp_linear(out_linear_b,
 784                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 785
 786                 dest[r_out] = clamp_u8(out_device_r*255);
 787                 dest[1]     = clamp_u8(out_device_g*255);
 788                 dest[b_out] = clamp_u8(out_device_b*255);
 789                 dest += 3;
 790         }
 791 }
 792
 793 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 794 {
 795         const int r_out = output_format.r;
 796         const int b_out = output_format.b;
 797
 798         unsigned int i;
 799         float (*mat)[4] = transform->matrix;
 800         for (i = 0; i < length; i++) {
 801                 unsigned char device_r = *src++;
 802                 unsigned char device_g = *src++;
 803                 unsigned char device_b = *src++;
 804                 unsigned char alpha = *src++;
 805                 float out_device_r, out_device_g, out_device_b;
 806
 807                 float linear_r = transform->input_gamma_table_r[device_r];
 808                 float linear_g = transform->input_gamma_table_g[device_g];
 809                 float linear_b = transform->input_gamma_table_b[device_b];
 810
 811                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 812                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 813                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 814
 815                 out_linear_r = clamp_float(out_linear_r);
 816                 out_linear_g = clamp_float(out_linear_g);
 817                 out_linear_b = clamp_float(out_linear_b);
 818
 819                 out_device_r = lut_interp_linear(out_linear_r,
 820                                 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 821                 out_device_g = lut_interp_linear(out_linear_g,
 822                                 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 823                 out_device_b = lut_interp_linear(out_linear_b,
 824                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 825
 826                 dest[r_out] = clamp_u8(out_device_r*255);
 827                 dest[1]     = clamp_u8(out_device_g*255);
 828                 dest[b_out] = clamp_u8(out_device_b*255);
 829                 dest[3]     = alpha;
 830                 dest += 4;
 831         }
 832 }
 833
 834 #if 0
 835 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 836 {
 837         const int r_out = output_format.r;
 838         const int b_out = output_format.b;
 839
 840         int i;
 841         float (*mat)[4] = transform->matrix;
 842         for (i = 0; i < length; i++) {
 843                 unsigned char device_r = *src++;
 844                 unsigned char device_g = *src++;
 845                 unsigned char device_b = *src++;
 846
 847                 float linear_r = transform->input_gamma_table_r[device_r];
 848                 float linear_g = transform->input_gamma_table_g[device_g];
 849                 float linear_b = transform->input_gamma_table_b[device_b];
 850
 851                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 852                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 853                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 854
 855                 dest[r_out] = clamp_u8(out_linear_r*255);
 856                 dest[1]     = clamp_u8(out_linear_g*255);
 857                 dest[b_out] = clamp_u8(out_linear_b*255);
 858                 dest += 3;
 859         }
 860 }
 861 #endif
 862
 863 /*
 864  * If users create and destroy objects on different threads, even if the same
 865  * objects aren't used on different threads at the same time, we can still run
 866  * in to trouble with refcounts if they aren't atomic.
 867  *
 868  * This can lead to us prematurely deleting the precache if threads get unlucky
 869  * and write the wrong value to the ref count.
 870  */
 871 static struct precache_output *precache_reference(struct precache_output *p)
 872 {
 873         qcms_atomic_increment(p->ref_count);
 874         return p;
 875 }
 876
 877 static struct precache_output *precache_create()
 878 {
 879         struct precache_output *p = malloc(sizeof(struct precache_output));
 880         if (p)
 881                 p->ref_count = 1;
 882         return p;
 883 }
 884
 885 void precache_release(struct precache_output *p)
 886 {
 887         if (qcms_atomic_decrement(p->ref_count) == 0) {
 888                 free(p);
 889         }
 890 }
 891
 892 #ifdef HAVE_POSIX_MEMALIGN
 893 static qcms_transform *transform_alloc(void)
 894 {
 895         qcms_transform *t;
 896         if (!posix_memalign(&t, 16, sizeof(*t))) {
 897                 return t;
 898         } else {
 899                 return NULL;
 900         }
 901 }
 902 static void transform_free(qcms_transform *t)
 903 {
 904         free(t);
 905 }
 906 #else
 907 static qcms_transform *transform_alloc(void)
 908 {
 909         /* transform needs to be aligned on a 16byte boundrary */
 910         char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
 911         /* make room for a pointer to the block returned by calloc */
 912         void *transform_start = original_block + sizeof(void*);
 913         /* align transform_start */
 914         qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
 915
 916         /* store a pointer to the block returned by calloc so that we can free it later */
 917         void **(original_block_ptr) = (void**)transform_aligned;
 918         if (!original_block)
 919                 return NULL;
 920         original_block_ptr--;
 921         *original_block_ptr = original_block;
 922
 923         return transform_aligned;
 924 }
 925 static void transform_free(qcms_transform *t)
 926 {
 927         /* get at the pointer to the unaligned block returned by calloc */
 928         void **p = (void**)t;
 929         p--;
 930         free(*p);
 931 }
 932 #endif
 933
 934 void qcms_transform_release(qcms_transform *t)
 935 {
 936         /* ensure we only free the gamma tables once even if there are
 937          * multiple references to the same data */
 938
 939         if (t->output_table_r)
 940                 precache_release(t->output_table_r);
 941         if (t->output_table_g)
 942                 precache_release(t->output_table_g);
 943         if (t->output_table_b)
 944                 precache_release(t->output_table_b);
 945
 946         free(t->input_gamma_table_r);
 947         if (t->input_gamma_table_g != t->input_gamma_table_r)
 948                 free(t->input_gamma_table_g);
 949         if (t->input_gamma_table_g != t->input_gamma_table_r &&
 950             t->input_gamma_table_g != t->input_gamma_table_b)
 951                 free(t->input_gamma_table_b);
 952
 953         free(t->input_gamma_table_gray);
 954
 955         free(t->output_gamma_lut_r);
 956         free(t->output_gamma_lut_g);
 957         free(t->output_gamma_lut_b);
 958
 959         transform_free(t);
 960 }
 961
 962 #ifdef X86
 963 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
 964 // mozilla/jpeg)
 965  // -------------------------------------------------------------------------
 966 #if defined(_M_IX86) && defined(_MSC_VER)
 967 #define HAS_CPUID
 968 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
 969    register - I'm not sure if that ever happens on windows, but cpuid isn't
 970    on the critical path so we just preserve the register to be safe and to be
 971    consistent with the non-windows version. */
 972 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
 973        uint32_t a_, b_, c_, d_;
 974        __asm {
 975               xchg   ebx, esi
 976               mov    eax, fxn
 977               cpuid
 978               mov    a_, eax
 979               mov    b_, ebx
 980               mov    c_, ecx
 981               mov    d_, edx
 982               xchg   ebx, esi
 983        }
 984        *a = a_;
 985        *b = b_;
 986        *c = c_;
 987        *d = d_;
 988 }
 989 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
 990 #define HAS_CPUID
 991 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
 992    some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
 993 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
 994
 995         uint32_t a_, b_, c_, d_;
 996        __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
 997                              : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
 998            *a = a_;
 999            *b = b_;
1000            *c = c_;
1001            *d = d_;
1002 }
1003 #endif
1004
1005 // -------------------------Runtime SSEx Detection-----------------------------
1006
1007 /* MMX is always supported per
1008  *  Gecko v1.9.1 minimum CPU requirements */
1009 #define SSE1_EDX_MASK (1UL << 25)
1010 #define SSE2_EDX_MASK (1UL << 26)
1011 #define SSE3_ECX_MASK (1UL <<  0)
1012
1013 static int sse_version_available(void)
1014 {
1015 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1016         /* we know at build time that 64-bit CPUs always have SSE2
1017          * this tells the compiler that non-SSE2 branches will never be
1018          * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1019         return 2;
1020 #elif defined(HAS_CPUID)
1021         static int sse_version = -1;
1022         uint32_t a, b, c, d;
1023         uint32_t function = 0x00000001;
1024
1025         if (sse_version == -1) {
1026                 sse_version = 0;
1027                 cpuid(function, &a, &b, &c, &d);
1028                 if (c & SSE3_ECX_MASK)
1029                         sse_version = 3;
1030                 else if (d & SSE2_EDX_MASK)
1031                         sse_version = 2;
1032                 else if (d & SSE1_EDX_MASK)
1033                         sse_version = 1;
1034         }
1035
1036         return sse_version;
1037 #else
1038         return 0;
1039 #endif
1040 }
1041 #endif
1042
1043 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
1044                                                 {-0.7502f, 1.7135f, 0.0367f},
1045                                                 { 0.0389f,-0.0685f, 1.0296f}},
1046                                                 false};
1047
1048 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f},
1049                                                     { 0.4323053f, 0.5183603f, 0.0492912f},
1050                                                     {-0.0085287f, 0.0400428f, 0.9684867f}},
1051                                                     false};
1052
1053 // See ICCv4 E.3
1054 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1055         float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) /
1056                   (X*bradford_matrix.m[0][0]      + Y*bradford_matrix.m[1][0]      + Z*bradford_matrix.m[2][0]     );
1057         float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) /
1058                   (X*bradford_matrix.m[0][1]      + Y*bradford_matrix.m[1][1]      + Z*bradford_matrix.m[2][1]     );
1059         float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) /
1060                   (X*bradford_matrix.m[0][2]      + Y*bradford_matrix.m[1][2]      + Z*bradford_matrix.m[2][2]     );
1061         struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1062         return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) );
1063 }
1064
1065 void qcms_profile_precache_output_transform(qcms_profile *profile)
1066 {
1067         /* we only support precaching on rgb profiles */
1068         if (profile->color_space != RGB_SIGNATURE)
1069                 return;
1070
1071         if (qcms_supports_iccv4) {
1072                 /* don't precache since we will use the B2A LUT */
1073                 if (profile->B2A0)
1074                         return;
1075
1076                 /* don't precache since we will use the mBA LUT */
1077                 if (profile->mBA)
1078                         return;
1079         }
1080
1081         /* don't precache if we do not have the TRC curves */
1082         if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1083                 return;
1084
1085         if (!profile->output_table_r) {
1086                 profile->output_table_r = precache_create();
1087                 if (profile->output_table_r &&
1088                                 !compute_precache(profile->redTRC, profile->output_table_r->data)) {
1089                         precache_release(profile->output_table_r);
1090                         profile->output_table_r = NULL;
1091                 }
1092         }
1093         if (!profile->output_table_g) {
1094                 profile->output_table_g = precache_create();
1095                 if (profile->output_table_g &&
1096                                 !compute_precache(profile->greenTRC, profile->output_table_g->data)) {
1097                         precache_release(profile->output_table_g);
1098                         profile->output_table_g = NULL;
1099                 }
1100         }
1101         if (!profile->output_table_b) {
1102                 profile->output_table_b = precache_create();
1103                 if (profile->output_table_b &&
1104                                 !compute_precache(profile->blueTRC, profile->output_table_b->data)) {
1105                         precache_release(profile->output_table_b);
1106                         profile->output_table_b = NULL;
1107                 }
1108         }
1109 }
1110
1111 /* Replace the current transformation with a LUT transformation using a given number of sample points */
1112 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out,
1113                                                  int samples, qcms_data_type in_type)
1114 {
1115         /* The range between which 2 consecutive sample points can be used to interpolate */
1116         uint16_t x,y,z;
1117         uint32_t l;
1118         uint32_t lutSize = 3 * samples * samples * samples;
1119         float* src = NULL;
1120         float* dest = NULL;
1121         float* lut = NULL;
1122         float inverse;
1123
1124         src = malloc(lutSize*sizeof(float));
1125         dest = malloc(lutSize*sizeof(float));
1126
1127         if (src && dest) {
1128                 /* Prepare a list of points we want to sample: x, y, z order */
1129                 l = 0;
1130                 inverse = 1 / (float)(samples-1);
1131                 for (x = 0; x < samples; x++) {
1132                         for (y = 0; y < samples; y++) {
1133                                 for (z = 0; z < samples; z++) {
1134                                         src[l++] = x * inverse; // r
1135                                         src[l++] = y * inverse; // g
1136                                         src[l++] = z * inverse; // b
1137                                 }
1138                         }
1139                 }
1140
1141                 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1142
1143                 if (lut) {
1144                         transform->r_clut = &lut[0]; // r
1145                         transform->g_clut = &lut[1]; // g
1146                         transform->b_clut = &lut[2]; // b
1147                         transform->grid_size = samples;
1148
1149                         if (in_type == QCMS_DATA_RGBA_8) {
1150 #if defined(SSE2_ENABLE)
1151                                 if (sse_version_available() >= 2) {
1152                                         transform->transform_fn = qcms_transform_data_tetra_clut_rgba_sse2;
1153                                 } else {
1154                                         transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1155                                 }
1156 #else
1157                                 transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1158 #endif
1159                         } else {
1160                                 transform->transform_fn = qcms_transform_data_tetra_clut;
1161                         }
1162                 }
1163         }
1164
1165         // XXX: qcms_modular_transform_data may return the lut in either the src or the
1166         // dest buffer. If so, it must not be free-ed.
1167         if (src && lut != src) {
1168                 free(src);
1169         }
1170         if (dest && lut != dest) {
1171                 free(dest);
1172         }
1173
1174         if (lut == NULL) {
1175                 return NULL;
1176         }
1177         return transform;
1178 }
1179
1180 /* Create a transform LUT using the given number of sample points. The transform LUT data is stored
1181    in the output (cube) in bgra format in zyx sample order. */
1182 qcms_bool qcms_transform_create_LUT_zyx_bgra(qcms_profile *in, qcms_profile *out, qcms_intent intent,
1183                                              int samples, unsigned char* cube)
1184 {
1185         uint16_t z,y,x;
1186         uint32_t l,index;
1187         uint32_t lutSize = 3 * samples * samples * samples;
1188
1189         float* src = NULL;
1190         float* dest = NULL;
1191         float* lut = NULL;
1192         float inverse;
1193
1194         src = malloc(lutSize*sizeof(float));
1195         dest = malloc(lutSize*sizeof(float));
1196
1197         if (src && dest) {
1198                 /* Prepare a list of points we want to sample: z, y, x order */
1199                 l = 0;
1200                 inverse = 1 / (float)(samples-1);
1201                 for (z = 0; z < samples; z++) {
1202                         for (y = 0; y < samples; y++) {
1203                                 for (x = 0; x < samples; x++) {
1204                                         src[l++] = x * inverse; // r
1205                                         src[l++] = y * inverse; // g
1206                                         src[l++] = z * inverse; // b
1207                                 }
1208                         }
1209                 }
1210
1211                 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1212
1213                 if (lut) {
1214                         index = l = 0;
1215                         for (z = 0; z < samples; z++) {
1216                                 for (y = 0; y < samples; y++) {
1217                                         for (x = 0; x < samples; x++) {
1218                                                 cube[index++] = (int)floorf(lut[l + 2] * 255.0f + 0.5f); // b
1219                                                 cube[index++] = (int)floorf(lut[l + 1] * 255.0f + 0.5f); // g
1220                                                 cube[index++] = (int)floorf(lut[l + 0] * 255.0f + 0.5f); // r
1221                                                 cube[index++] = 255;                                     // a
1222                                                 l += 3;
1223                                         }
1224                                 }
1225                         }
1226                 }
1227         }
1228
1229         // XXX: qcms_modular_transform_data may return the lut data in either the src or
1230         // dest buffer so free src, dest, and lut with care.
1231
1232         if (src && lut != src)
1233                 free(src);
1234         if (dest && lut != dest)
1235                 free(dest);
1236
1237         if (lut) {
1238                 free(lut);
1239                 return true;
1240         }
1241
1242         return false;
1243 }
1244
1245 #define NO_MEM_TRANSFORM NULL
1246
1247 qcms_transform* qcms_transform_create(
1248                 qcms_profile *in, qcms_data_type in_type,
1249                 qcms_profile *out, qcms_data_type out_type,
1250                 qcms_intent intent)
1251 {
1252         qcms_transform *transform = NULL;
1253         bool precache = false;
1254         int i, j;
1255
1256         transform = transform_alloc();
1257         if (!transform) {
1258                 return NULL;
1259         }
1260
1261         if (out_type != QCMS_DATA_RGB_8 && out_type != QCMS_DATA_RGBA_8) {
1262                 assert(0 && "output type");
1263                 qcms_transform_release(transform);
1264                 return NULL;
1265         }
1266
1267         transform->transform_flags = 0;
1268
1269         if (out->output_table_r && out->output_table_g && out->output_table_b) {
1270                 precache = true;
1271         }
1272
1273         if (qcms_supports_iccv4 && (in->A2B0 || out->B2A0 || in->mAB || out->mAB)) {
1274                 // Precache the transformation to a CLUT 33x33x33 in size.
1275                 // 33 is used by many profiles and works well in practice.
1276                 // This evenly divides 256 into blocks of 8x8x8.
1277                 // TODO For transforming small data sets of about 200x200 or less
1278                 // precaching should be avoided.
1279                 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type);
1280                 if (!result) {
1281                         assert(0 && "precacheLUT failed");
1282                         qcms_transform_release(transform);
1283                         return NULL;
1284                 }
1285                 return result;
1286         }
1287
1288         /* A matrix-based transform will be selected: check that the PCS
1289            of the input/output profiles are the same, crbug.com/5120682 */
1290         if (in->pcs != out->pcs) {
1291                 qcms_transform_release(transform);
1292                 return NULL;
1293         }
1294
1295         if (precache) {
1296                 transform->output_table_r = precache_reference(out->output_table_r);
1297                 transform->output_table_g = precache_reference(out->output_table_g);
1298                 transform->output_table_b = precache_reference(out->output_table_b);
1299         } else {
1300                 if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1301                         qcms_transform_release(transform);
1302                         return NO_MEM_TRANSFORM;
1303                 }
1304
1305                 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
1306                 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1307                 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
1308
1309                 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
1310                         qcms_transform_release(transform);
1311                         return NO_MEM_TRANSFORM;
1312                 }
1313         }
1314
1315         if (in->color_space == RGB_SIGNATURE) {
1316                 struct matrix in_matrix, out_matrix, result;
1317
1318                 if (in_type != QCMS_DATA_RGB_8 && in_type != QCMS_DATA_RGBA_8) {
1319                         assert(0 && "input type");
1320                         qcms_transform_release(transform);
1321                         return NULL;
1322                 }
1323
1324                 if (precache) {
1325 #if defined(SSE2_ENABLE)
1326                         if (sse_version_available() >= 2) {
1327                                 if (in_type == QCMS_DATA_RGB_8)
1328                                         transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1329                                 else
1330                                         transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1331                         } else
1332 #endif
1333                         {
1334                                 if (in_type == QCMS_DATA_RGB_8)
1335                                         transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
1336                                 else
1337                                         transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
1338                         }
1339                 } else {
1340                         if (in_type == QCMS_DATA_RGB_8)
1341                                 transform->transform_fn = qcms_transform_data_rgb_out_lut;
1342                         else
1343                                 transform->transform_fn = qcms_transform_data_rgba_out_lut;
1344                 }
1345
1346                 //XXX: avoid duplicating tables if we can
1347                 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
1348                 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
1349                 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
1350
1351                 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
1352                         qcms_transform_release(transform);
1353                         return NO_MEM_TRANSFORM;
1354                 }
1355
1356                 /* build combined colorant matrix */
1357                 in_matrix = build_colorant_matrix(in);
1358                 out_matrix = build_colorant_matrix(out);
1359                 out_matrix = matrix_invert(out_matrix);
1360                 if (out_matrix.invalid) {
1361                         qcms_transform_release(transform);
1362                         return NULL;
1363                 }
1364                 result = matrix_multiply(out_matrix, in_matrix);
1365
1366                 /* check for NaN values in the matrix and bail if we find any
1367                    see also https://bugzilla.mozilla.org/show_bug.cgi?id=1170316 */
1368                 for (i = 0 ; i < 3 ; ++i) {
1369                         for (j = 0 ; j < 3 ; ++j) {
1370                                 if (result.m[i][j] != result.m[i][j]) {
1371                                         qcms_transform_release(transform);
1372                                         return NULL;
1373                                 }
1374                         }
1375                 }
1376
1377                 /* store the results in column major mode
1378                  * this makes doing the multiplication with sse easier */
1379                 transform->matrix[0][0] = result.m[0][0];
1380                 transform->matrix[1][0] = result.m[0][1];
1381                 transform->matrix[2][0] = result.m[0][2];
1382                 transform->matrix[0][1] = result.m[1][0];
1383                 transform->matrix[1][1] = result.m[1][1];
1384                 transform->matrix[2][1] = result.m[1][2];
1385                 transform->matrix[0][2] = result.m[2][0];
1386                 transform->matrix[1][2] = result.m[2][1];
1387                 transform->matrix[2][2] = result.m[2][2];
1388
1389                 /* Flag transform as matrix. */
1390                 transform->transform_flags |= TRANSFORM_FLAG_MATRIX;
1391
1392         } else if (in->color_space == GRAY_SIGNATURE) {
1393                 if (in_type != QCMS_DATA_GRAY_8 && in_type != QCMS_DATA_GRAYA_8) {
1394                         assert(0 && "input type");
1395                         qcms_transform_release(transform);
1396                         return NULL;
1397                 }
1398
1399                 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
1400
1401                 if (!transform->input_gamma_table_gray) {
1402                         qcms_transform_release(transform);
1403                         return NO_MEM_TRANSFORM;
1404                 }
1405
1406                 if (precache) {
1407                         if (in_type == QCMS_DATA_GRAY_8) {
1408                                 transform->transform_fn = qcms_transform_data_gray_out_precache;
1409                         } else {
1410                                 transform->transform_fn = qcms_transform_data_graya_out_precache;
1411                         }
1412                 } else {
1413                         if (in_type == QCMS_DATA_GRAY_8) {
1414                                 transform->transform_fn = qcms_transform_data_gray_out_lut;
1415                         } else {
1416                                 transform->transform_fn = qcms_transform_data_graya_out_lut;
1417                         }
1418                 }
1419         } else {
1420                 assert(0 && "unexpected colorspace");
1421                 qcms_transform_release(transform);
1422                 return NULL;
1423         }
1424
1425         return transform;
1426 }
1427
1428 /* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
1429  * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
1430  * of the attribute but is currently only supported by clang */
1431 #if defined(__has_attribute)
1432 #define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
1433 #elif defined(__GNUC__) && defined(__i386__)
1434 #define HAS_FORCE_ALIGN_ARG_POINTER 1
1435 #else
1436 #define HAS_FORCE_ALIGN_ARG_POINTER 0
1437 #endif
1438
1439 #if HAS_FORCE_ALIGN_ARG_POINTER
1440 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1441 __attribute__((__force_align_arg_pointer__))
1442 #endif
1443 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1444 {
1445         static const struct _qcms_format_type output_rgbx = { 0, 2 };
1446
1447         transform->transform_fn(transform, src, dest, length, output_rgbx);
1448 }
1449
1450 void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
1451 {
1452         static const struct _qcms_format_type output_rgbx = { 0, 2 };
1453         static const struct _qcms_format_type output_bgrx = { 2, 0 };
1454
1455         transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
1456 }
1457
1458 qcms_bool qcms_supports_iccv4;
1459
1460 void qcms_enable_iccv4()
1461 {
1462         qcms_supports_iccv4 = true;
1463 }
1464
1465 static inline qcms_bool transform_is_matrix(qcms_transform *t)
1466 {
1467         return (t->transform_flags & TRANSFORM_FLAG_MATRIX) ? true : false;
1468 }
1469
1470 qcms_bool qcms_transform_is_matrix(qcms_transform *t)
1471 {
1472         return transform_is_matrix(t);
1473 }
1474
1475 float qcms_transform_get_matrix(qcms_transform *t, unsigned i, unsigned j)
1476 {
1477         assert(transform_is_matrix(t) && i < 3 && j < 3);
1478
1479         // Return transform matrix element in row major order (permute i and j)
1480
1481         return t->matrix[j][i];
1482 }
1483
1484 static inline qcms_bool supported_trc_type(qcms_trc_type type)
1485 {
1486         return type == QCMS_TRC_HALF_FLOAT;
1487 }
1488
1489 const uint16_t half_float_one = 0x3c00;
1490
1491 size_t qcms_transform_get_input_trc_rgba(qcms_transform *t, qcms_profile *in, qcms_trc_type type, unsigned short *data)
1492 {
1493         const size_t size = 256; // The input gamma tables always have 256 entries.
1494
1495         size_t i;
1496
1497         if (in->color_space != RGB_SIGNATURE || !supported_trc_type(type))
1498                 return 0;
1499
1500         // qcms_profile *in is assumed to be the profile on the input-side of the color transform t.
1501         // When a transform is created, the input gamma curve data is stored in the transform ...
1502
1503         if (!t->input_gamma_table_r || !t->input_gamma_table_g || !t->input_gamma_table_b)
1504                 return 0;
1505
1506         // Report the size if no output data is requested. This allows callers to first work out the
1507         // the curve size, then provide allocated memory sufficient to store the curve rgba data.
1508
1509         if (!data)
1510                 return size;
1511
1512         for (i = 0; i < size; ++i) {
1513                 *data++ = float_to_half_float(t->input_gamma_table_r[i]); // r
1514                 *data++ = float_to_half_float(t->input_gamma_table_g[i]); // g
1515                 *data++ = float_to_half_float(t->input_gamma_table_b[i]); // b
1516                 *data++ = half_float_one;                                 // a
1517         }
1518
1519         return size;
1520 }
1521
1522 const float inverse65535 = (float) (1.0 / 65535.0);
1523
1524 size_t qcms_transform_get_output_trc_rgba(qcms_transform *t, qcms_profile *out, qcms_trc_type type, unsigned short *data)
1525 {
1526         size_t size, i;
1527
1528         if (out->color_space != RGB_SIGNATURE || !supported_trc_type(type))
1529                 return 0;
1530
1531         // qcms_profile *out is assumed to be the profile on the output-side of the transform t.
1532         // If the transform output gamma curves need building, do that. They're usually built when
1533         // the transform was created, but sometimes not due to the output gamma precache ...
1534
1535         if (!out->redTRC || !out->greenTRC || !out->blueTRC)
1536                 return 0;
1537         if (!t->output_gamma_lut_r)
1538                 build_output_lut(out->redTRC, &t->output_gamma_lut_r, &t->output_gamma_lut_r_length);
1539         if (!t->output_gamma_lut_g)
1540                 build_output_lut(out->greenTRC, &t->output_gamma_lut_g, &t->output_gamma_lut_g_length);
1541         if (!t->output_gamma_lut_b)
1542                 build_output_lut(out->blueTRC, &t->output_gamma_lut_b, &t->output_gamma_lut_b_length);
1543
1544         if (!t->output_gamma_lut_r || !t->output_gamma_lut_g || !t->output_gamma_lut_b)
1545                 return 0;
1546
1547         // Output gamma tables should have the same size and should have 4096 entries at most (the
1548         // minimum is 256). Larger tables are rare and ignored here: fail by returning 0.
1549
1550         size = t->output_gamma_lut_r_length;
1551         if (size != t->output_gamma_lut_g_length)
1552                 return 0;
1553         if (size != t->output_gamma_lut_b_length)
1554                 return 0;
1555         if (size < 256 || size > 4096)
1556                 return 0;
1557
1558         // Report the size if no output data is requested. This allows callers to first work out the
1559         // the curve size, then provide allocated memory sufficient to store the curve rgba data.
1560
1561         if (!data)
1562                 return size;
1563
1564         for (i = 0; i < size; ++i) {
1565                 *data++ = float_to_half_float(t->output_gamma_lut_r[i] * inverse65535); // r
1566                 *data++ = float_to_half_float(t->output_gamma_lut_g[i] * inverse65535); // g
1567                 *data++ = float_to_half_float(t->output_gamma_lut_b[i] * inverse65535); // b
1568                 *data++ = half_float_one;                                               // a
1569         }
1570
1571         return size;
1572 }