third_party/qcms/src/transform.c

   1 /* vim: set ts=8 sw=8 noexpandtab: */
   2 //  qcms
   3 //  Copyright (C) 2009 Mozilla Corporation
   4 //  Copyright (C) 1998-2007 Marti Maria
   5 //
   6 // Permission is hereby granted, free of charge, to any person obtaining
   7 // a copy of this software and associated documentation files (the "Software"),
   8 // to deal in the Software without restriction, including without limitation
   9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 // and/or sell copies of the Software, and to permit persons to whom the Software
  11 // is furnished to do so, subject to the following conditions:
  12 //
  13 // The above copyright notice and this permission notice shall be included in
  14 // all copies or substantial portions of the Software.
  15 //
  16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23
  24 #include <stdlib.h>
  25 #include <math.h>
  26 #include <assert.h>
  27 #include <string.h> //memcpy
  28 #include "qcmsint.h"
  29 #include "chain.h"
  30 #include "halffloat.h"
  31 #include "matrix.h"
  32 #include "transform_util.h"
  33
  34 /* for MSVC, GCC, Intel, and Sun compilers */
  35 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
  36 #define X86
  37 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
  38
  39 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
  40 // This is just an approximation, I am not handling all the non-linear
  41 // aspects of the RGB to XYZ process, and assumming that the gamma correction
  42 // has transitive property in the tranformation chain.
  43 //
  44 // the alghoritm:
  45 //
  46 //            - First I build the absolute conversion matrix using
  47 //              primaries in XYZ. This matrix is next inverted
  48 //            - Then I eval the source white point across this matrix
  49 //              obtaining the coeficients of the transformation
  50 //            - Then, I apply these coeficients to the original matrix
  51 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
  52 {
  53         struct matrix primaries;
  54         struct matrix primaries_invert;
  55         struct matrix result;
  56         struct vector white_point;
  57         struct vector coefs;
  58
  59         double xn, yn;
  60         double xr, yr;
  61         double xg, yg;
  62         double xb, yb;
  63
  64         xn = white.x;
  65         yn = white.y;
  66
  67         if (yn == 0.0)
  68                 return matrix_invalid();
  69
  70         xr = primrs.red.x;
  71         yr = primrs.red.y;
  72         xg = primrs.green.x;
  73         yg = primrs.green.y;
  74         xb = primrs.blue.x;
  75         yb = primrs.blue.y;
  76
  77         primaries.m[0][0] = xr;
  78         primaries.m[0][1] = xg;
  79         primaries.m[0][2] = xb;
  80
  81         primaries.m[1][0] = yr;
  82         primaries.m[1][1] = yg;
  83         primaries.m[1][2] = yb;
  84
  85         primaries.m[2][0] = 1 - xr - yr;
  86         primaries.m[2][1] = 1 - xg - yg;
  87         primaries.m[2][2] = 1 - xb - yb;
  88         primaries.invalid = false;
  89
  90         white_point.v[0] = xn/yn;
  91         white_point.v[1] = 1.;
  92         white_point.v[2] = (1.0-xn-yn)/yn;
  93
  94         primaries_invert = matrix_invert(primaries);
  95
  96         coefs = matrix_eval(primaries_invert, white_point);
  97
  98         result.m[0][0] = coefs.v[0]*xr;
  99         result.m[0][1] = coefs.v[1]*xg;
 100         result.m[0][2] = coefs.v[2]*xb;
 101
 102         result.m[1][0] = coefs.v[0]*yr;
 103         result.m[1][1] = coefs.v[1]*yg;
 104         result.m[1][2] = coefs.v[2]*yb;
 105
 106         result.m[2][0] = coefs.v[0]*(1.-xr-yr);
 107         result.m[2][1] = coefs.v[1]*(1.-xg-yg);
 108         result.m[2][2] = coefs.v[2]*(1.-xb-yb);
 109         result.invalid = primaries_invert.invalid;
 110
 111         return result;
 112 }
 113
 114 struct CIE_XYZ {
 115         double X;
 116         double Y;
 117         double Z;
 118 };
 119
 120 /* CIE Illuminant D50 */
 121 static const struct CIE_XYZ D50_XYZ = {
 122         0.9642,
 123         1.0000,
 124         0.8249
 125 };
 126
 127 /* from lcms: xyY2XYZ()
 128  * corresponds to argyll: icmYxy2XYZ() */
 129 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
 130 {
 131         struct CIE_XYZ dest;
 132         dest.X = (source.x / source.y) * source.Y;
 133         dest.Y = source.Y;
 134         dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
 135         return dest;
 136 }
 137
 138 /* from lcms: ComputeChromaticAdaption */
 139 // Compute chromatic adaption matrix using chad as cone matrix
 140 static struct matrix
 141 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
 142                            struct CIE_XYZ dest_white_point,
 143                            struct matrix chad)
 144 {
 145         struct matrix chad_inv;
 146         struct vector cone_source_XYZ, cone_source_rgb;
 147         struct vector cone_dest_XYZ, cone_dest_rgb;
 148         struct matrix cone, tmp;
 149
 150         tmp = chad;
 151         chad_inv = matrix_invert(tmp);
 152
 153         cone_source_XYZ.v[0] = source_white_point.X;
 154         cone_source_XYZ.v[1] = source_white_point.Y;
 155         cone_source_XYZ.v[2] = source_white_point.Z;
 156
 157         cone_dest_XYZ.v[0] = dest_white_point.X;
 158         cone_dest_XYZ.v[1] = dest_white_point.Y;
 159         cone_dest_XYZ.v[2] = dest_white_point.Z;
 160
 161         cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
 162         cone_dest_rgb   = matrix_eval(chad, cone_dest_XYZ);
 163
 164         cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
 165         cone.m[0][1] = 0;
 166         cone.m[0][2] = 0;
 167         cone.m[1][0] = 0;
 168         cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
 169         cone.m[1][2] = 0;
 170         cone.m[2][0] = 0;
 171         cone.m[2][1] = 0;
 172         cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
 173         cone.invalid = false;
 174
 175         // Normalize
 176         return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
 177 }
 178
 179 /* from lcms: cmsAdaptionMatrix */
 180 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
 181 // Bradford is assumed
 182 static struct matrix
 183 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
 184 {
 185 #if defined (_MSC_VER)
 186 #pragma warning(push)
 187 /* Disable double to float truncation warning 4305 */
 188 #pragma warning(disable:4305)
 189 #endif
 190         struct matrix lam_rigg = {{ // Bradford matrix
 191                                  {  0.8951,  0.2664, -0.1614 },
 192                                  { -0.7502,  1.7135,  0.0367 },
 193                                  {  0.0389, -0.0685,  1.0296 }
 194                                  }};
 195 #if defined (_MSC_VER)
 196 /* Restore warnings */
 197 #pragma warning(pop)
 198 #endif
 199         return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
 200 }
 201
 202 /* from lcms: cmsAdaptMatrixToD50 */
 203 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
 204 {
 205         struct CIE_XYZ Dn;
 206         struct matrix Bradford;
 207
 208         if (source_white_pt.y == 0.0)
 209                 return matrix_invalid();
 210
 211         Dn = xyY2XYZ(source_white_pt);
 212
 213         Bradford = adaption_matrix(Dn, D50_XYZ);
 214         return matrix_multiply(Bradford, r);
 215 }
 216
 217 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
 218 {
 219         struct matrix colorants;
 220         colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
 221         colorants = adapt_matrix_to_D50(colorants, white_point);
 222
 223         if (colorants.invalid)
 224                 return false;
 225
 226         /* note: there's a transpose type of operation going on here */
 227         profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
 228         profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
 229         profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
 230
 231         profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
 232         profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
 233         profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
 234
 235         profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
 236         profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
 237         profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
 238
 239         return true;
 240 }
 241
 242 #if 0
 243 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 244 {
 245         const int r_out = output_format.r;
 246         const int b_out = output_format.b;
 247
 248         int i;
 249         float (*mat)[4] = transform->matrix;
 250         for (i=0; i<length; i++) {
 251                 unsigned char device_r = *src++;
 252                 unsigned char device_g = *src++;
 253                 unsigned char device_b = *src++;
 254
 255                 float linear_r = transform->input_gamma_table_r[device_r];
 256                 float linear_g = transform->input_gamma_table_g[device_g];
 257                 float linear_b = transform->input_gamma_table_b[device_b];
 258
 259                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 260                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 261                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 262
 263                 float out_device_r = pow(out_linear_r, transform->out_gamma_r);
 264                 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
 265                 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
 266
 267                 dest[r_out] = clamp_u8(out_device_r*255);
 268                 dest[1]     = clamp_u8(out_device_g*255);
 269                 dest[b_out] = clamp_u8(out_device_b*255);
 270                 dest += 3;
 271         }
 272 }
 273 #endif
 274
 275 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 276 {
 277         const int r_out = output_format.r;
 278         const int b_out = output_format.b;
 279
 280         unsigned int i;
 281         for (i = 0; i < length; i++) {
 282                 float out_device_r, out_device_g, out_device_b;
 283                 unsigned char device = *src++;
 284
 285                 float linear = transform->input_gamma_table_gray[device];
 286
 287                 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 288                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 289                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 290
 291                 dest[r_out] = clamp_u8(out_device_r*255);
 292                 dest[1]     = clamp_u8(out_device_g*255);
 293                 dest[b_out] = clamp_u8(out_device_b*255);
 294                 dest += 3;
 295         }
 296 }
 297
 298 /* Alpha is not corrected.
 299    A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
 300    RGB Is?" Tech Memo 17 (December 14, 1998).
 301         See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
 302 */
 303
 304 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 305 {
 306         const int r_out = output_format.r;
 307         const int b_out = output_format.b;
 308
 309         unsigned int i;
 310         for (i = 0; i < length; i++) {
 311                 float out_device_r, out_device_g, out_device_b;
 312                 unsigned char device = *src++;
 313                 unsigned char alpha = *src++;
 314
 315                 float linear = transform->input_gamma_table_gray[device];
 316
 317                 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 318                 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 319                 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 320
 321                 dest[r_out] = clamp_u8(out_device_r*255);
 322                 dest[1]     = clamp_u8(out_device_g*255);
 323                 dest[b_out] = clamp_u8(out_device_b*255);
 324                 dest[3]     = alpha;
 325                 dest += 4;
 326         }
 327 }
 328
 329
 330 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 331 {
 332         const int r_out = output_format.r;
 333         const int b_out = output_format.b;
 334
 335         unsigned int i;
 336         for (i = 0; i < length; i++) {
 337                 unsigned char device = *src++;
 338                 uint16_t gray;
 339
 340                 float linear = transform->input_gamma_table_gray[device];
 341
 342                 /* we could round here... */
 343                 gray = linear * PRECACHE_OUTPUT_MAX;
 344
 345                 dest[r_out] = transform->output_table_r->data[gray];
 346                 dest[1]     = transform->output_table_g->data[gray];
 347                 dest[b_out] = transform->output_table_b->data[gray];
 348                 dest += 3;
 349         }
 350 }
 351
 352
 353 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 354 {
 355         const int r_out = output_format.r;
 356         const int b_out = output_format.b;
 357
 358         unsigned int i;
 359         for (i = 0; i < length; i++) {
 360                 unsigned char device = *src++;
 361                 unsigned char alpha = *src++;
 362                 uint16_t gray;
 363
 364                 float linear = transform->input_gamma_table_gray[device];
 365
 366                 /* we could round here... */
 367                 gray = linear * PRECACHE_OUTPUT_MAX;
 368
 369                 dest[r_out] = transform->output_table_r->data[gray];
 370                 dest[1]     = transform->output_table_g->data[gray];
 371                 dest[b_out] = transform->output_table_b->data[gray];
 372                 dest[3]     = alpha;
 373                 dest += 4;
 374         }
 375 }
 376
 377 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 378 {
 379         const int r_out = output_format.r;
 380         const int b_out = output_format.b;
 381
 382         unsigned int i;
 383         float (*mat)[4] = transform->matrix;
 384         for (i = 0; i < length; i++) {
 385                 unsigned char device_r = *src++;
 386                 unsigned char device_g = *src++;
 387                 unsigned char device_b = *src++;
 388                 uint16_t r, g, b;
 389
 390                 float linear_r = transform->input_gamma_table_r[device_r];
 391                 float linear_g = transform->input_gamma_table_g[device_g];
 392                 float linear_b = transform->input_gamma_table_b[device_b];
 393
 394                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 395                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 396                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 397
 398                 out_linear_r = clamp_float(out_linear_r);
 399                 out_linear_g = clamp_float(out_linear_g);
 400                 out_linear_b = clamp_float(out_linear_b);
 401
 402                 /* we could round here... */
 403                 r = out_linear_r * PRECACHE_OUTPUT_MAX;
 404                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 405                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 406
 407                 dest[r_out] = transform->output_table_r->data[r];
 408                 dest[1]     = transform->output_table_g->data[g];
 409                 dest[b_out] = transform->output_table_b->data[b];
 410                 dest += 3;
 411         }
 412 }
 413
 414 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 415 {
 416         const int r_out = output_format.r;
 417         const int b_out = output_format.b;
 418
 419         unsigned int i;
 420         float (*mat)[4] = transform->matrix;
 421         for (i = 0; i < length; i++) {
 422                 unsigned char device_r = *src++;
 423                 unsigned char device_g = *src++;
 424                 unsigned char device_b = *src++;
 425                 unsigned char alpha = *src++;
 426                 uint16_t r, g, b;
 427
 428                 float linear_r = transform->input_gamma_table_r[device_r];
 429                 float linear_g = transform->input_gamma_table_g[device_g];
 430                 float linear_b = transform->input_gamma_table_b[device_b];
 431
 432                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 433                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 434                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 435
 436                 out_linear_r = clamp_float(out_linear_r);
 437                 out_linear_g = clamp_float(out_linear_g);
 438                 out_linear_b = clamp_float(out_linear_b);
 439
 440                 /* we could round here... */
 441                 r = out_linear_r * PRECACHE_OUTPUT_MAX;
 442                 g = out_linear_g * PRECACHE_OUTPUT_MAX;
 443                 b = out_linear_b * PRECACHE_OUTPUT_MAX;
 444
 445                 dest[r_out] = transform->output_table_r->data[r];
 446                 dest[1]     = transform->output_table_g->data[g];
 447                 dest[b_out] = transform->output_table_b->data[b];
 448                 dest[3]     = alpha;
 449                 dest += 4;
 450         }
 451 }
 452
 453 // Not used
 454 /*
 455 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 456 {
 457         const int r_out = output_format.r;
 458         const int b_out = output_format.b;
 459
 460         unsigned int i;
 461         int xy_len = 1;
 462         int x_len = transform->grid_size;
 463         int len = x_len * x_len;
 464         float* r_table = transform->r_clut;
 465         float* g_table = transform->g_clut;
 466         float* b_table = transform->b_clut;
 467
 468         for (i = 0; i < length; i++) {
 469                 unsigned char in_r = *src++;
 470                 unsigned char in_g = *src++;
 471                 unsigned char in_b = *src++;
 472                 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
 473
 474                 int x = floor(linear_r * (transform->grid_size-1));
 475                 int y = floor(linear_g * (transform->grid_size-1));
 476                 int z = floor(linear_b * (transform->grid_size-1));
 477                 int x_n = ceil(linear_r * (transform->grid_size-1));
 478                 int y_n = ceil(linear_g * (transform->grid_size-1));
 479                 int z_n = ceil(linear_b * (transform->grid_size-1));
 480                 float x_d = linear_r * (transform->grid_size-1) - x;
 481                 float y_d = linear_g * (transform->grid_size-1) - y;
 482                 float z_d = linear_b * (transform->grid_size-1) - z;
 483
 484                 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
 485                 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
 486                 float r_y1 = lerp(r_x1, r_x2, y_d);
 487                 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
 488                 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
 489                 float r_y2 = lerp(r_x3, r_x4, y_d);
 490                 float clut_r = lerp(r_y1, r_y2, z_d);
 491
 492                 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
 493                 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
 494                 float g_y1 = lerp(g_x1, g_x2, y_d);
 495                 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
 496                 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
 497                 float g_y2 = lerp(g_x3, g_x4, y_d);
 498                 float clut_g = lerp(g_y1, g_y2, z_d);
 499
 500                 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
 501                 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
 502                 float b_y1 = lerp(b_x1, b_x2, y_d);
 503                 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
 504                 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
 505                 float b_y2 = lerp(b_x3, b_x4, y_d);
 506                 float clut_b = lerp(b_y1, b_y2, z_d);
 507
 508                 dest[r_out] = clamp_u8(clut_r*255.0f);
 509                 dest[1]     = clamp_u8(clut_g*255.0f);
 510                 dest[b_out] = clamp_u8(clut_b*255.0f);
 511                 dest += 3;
 512         }
 513 }
 514 */
 515
 516 // Using lcms' tetra interpolation algorithm.
 517 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 518 {
 519         const int r_out = output_format.r;
 520         const int b_out = output_format.b;
 521
 522         unsigned int i;
 523         int xy_len = 1;
 524         int x_len = transform->grid_size;
 525         int len = x_len * x_len;
 526         float* r_table = transform->r_clut;
 527         float* g_table = transform->g_clut;
 528         float* b_table = transform->b_clut;
 529         float c0_r, c1_r, c2_r, c3_r;
 530         float c0_g, c1_g, c2_g, c3_g;
 531         float c0_b, c1_b, c2_b, c3_b;
 532         float clut_r, clut_g, clut_b;
 533         for (i = 0; i < length; i++) {
 534                 unsigned char in_r = *src++;
 535                 unsigned char in_g = *src++;
 536                 unsigned char in_b = *src++;
 537                 unsigned char in_a = *src++;
 538                 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
 539
 540                 int x = floor(linear_r * (transform->grid_size-1));
 541                 int y = floor(linear_g * (transform->grid_size-1));
 542                 int z = floor(linear_b * (transform->grid_size-1));
 543                 int x_n = ceil(linear_r * (transform->grid_size-1));
 544                 int y_n = ceil(linear_g * (transform->grid_size-1));
 545                 int z_n = ceil(linear_b * (transform->grid_size-1));
 546                 float rx = linear_r * (transform->grid_size-1) - x;
 547                 float ry = linear_g * (transform->grid_size-1) - y;
 548                 float rz = linear_b * (transform->grid_size-1) - z;
 549
 550                 c0_r = CLU(r_table, x, y, z);
 551                 c0_g = CLU(g_table, x, y, z);
 552                 c0_b = CLU(b_table, x, y, z);
 553
 554                 if( rx >= ry ) {
 555                         if (ry >= rz) { //rx >= ry && ry >= rz
 556                                 c1_r = CLU(r_table, x_n, y, z) - c0_r;
 557                                 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
 558                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 559                                 c1_g = CLU(g_table, x_n, y, z) - c0_g;
 560                                 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
 561                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 562                                 c1_b = CLU(b_table, x_n, y, z) - c0_b;
 563                                 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
 564                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 565                         } else {
 566                                 if (rx >= rz) { //rx >= rz && rz >= ry
 567                                         c1_r = CLU(r_table, x_n, y, z) - c0_r;
 568                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 569                                         c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
 570                                         c1_g = CLU(g_table, x_n, y, z) - c0_g;
 571                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 572                                         c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
 573                                         c1_b = CLU(b_table, x_n, y, z) - c0_b;
 574                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 575                                         c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
 576                                 } else { //rz > rx && rx >= ry
 577                                         c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
 578                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 579                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 580                                         c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
 581                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 582                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 583                                         c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
 584                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 585                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 586                                 }
 587                         }
 588                 } else {
 589                         if (rx >= rz) { //ry > rx && rx >= rz
 590                                 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
 591                                 c2_r = CLU(r_table, x, y_n, z) - c0_r;
 592                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 593                                 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
 594                                 c2_g = CLU(g_table, x, y_n, z) - c0_g;
 595                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 596                                 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
 597                                 c2_b = CLU(b_table, x, y_n, z) - c0_b;
 598                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 599                         } else {
 600                                 if (ry >= rz) { //ry >= rz && rz > rx
 601                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 602                                         c2_r = CLU(r_table, x, y_n, z) - c0_r;
 603                                         c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
 604                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 605                                         c2_g = CLU(g_table, x, y_n, z) - c0_g;
 606                                         c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
 607                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 608                                         c2_b = CLU(b_table, x, y_n, z) - c0_b;
 609                                         c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
 610                                 } else { //rz > ry && ry > rx
 611                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 612                                         c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
 613                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 614                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 615                                         c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
 616                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 617                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 618                                         c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
 619                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 620                                 }
 621                         }
 622                 }
 623
 624                 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
 625                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 626                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 627
 628                 dest[r_out] = clamp_u8(clut_r*255.0f);
 629                 dest[1]     = clamp_u8(clut_g*255.0f);
 630                 dest[b_out] = clamp_u8(clut_b*255.0f);
 631                 dest[3]     = in_a;
 632                 dest += 4;
 633         }
 634 }
 635
 636 // Using lcms' tetra interpolation code.
 637 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 638 {
 639         const int r_out = output_format.r;
 640         const int b_out = output_format.b;
 641
 642         unsigned int i;
 643         int xy_len = 1;
 644         int x_len = transform->grid_size;
 645         int len = x_len * x_len;
 646         float* r_table = transform->r_clut;
 647         float* g_table = transform->g_clut;
 648         float* b_table = transform->b_clut;
 649         float c0_r, c1_r, c2_r, c3_r;
 650         float c0_g, c1_g, c2_g, c3_g;
 651         float c0_b, c1_b, c2_b, c3_b;
 652         float clut_r, clut_g, clut_b;
 653         for (i = 0; i < length; i++) {
 654                 unsigned char in_r = *src++;
 655                 unsigned char in_g = *src++;
 656                 unsigned char in_b = *src++;
 657                 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
 658
 659                 int x = floor(linear_r * (transform->grid_size-1));
 660                 int y = floor(linear_g * (transform->grid_size-1));
 661                 int z = floor(linear_b * (transform->grid_size-1));
 662                 int x_n = ceil(linear_r * (transform->grid_size-1));
 663                 int y_n = ceil(linear_g * (transform->grid_size-1));
 664                 int z_n = ceil(linear_b * (transform->grid_size-1));
 665                 float rx = linear_r * (transform->grid_size-1) - x;
 666                 float ry = linear_g * (transform->grid_size-1) - y;
 667                 float rz = linear_b * (transform->grid_size-1) - z;
 668
 669                 c0_r = CLU(r_table, x, y, z);
 670                 c0_g = CLU(g_table, x, y, z);
 671                 c0_b = CLU(b_table, x, y, z);
 672
 673                 if( rx >= ry ) {
 674                         if (ry >= rz) { //rx >= ry && ry >= rz
 675                                 c1_r = CLU(r_table, x_n, y, z) - c0_r;
 676                                 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
 677                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 678                                 c1_g = CLU(g_table, x_n, y, z) - c0_g;
 679                                 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
 680                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 681                                 c1_b = CLU(b_table, x_n, y, z) - c0_b;
 682                                 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
 683                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 684                         } else {
 685                                 if (rx >= rz) { //rx >= rz && rz >= ry
 686                                         c1_r = CLU(r_table, x_n, y, z) - c0_r;
 687                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 688                                         c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
 689                                         c1_g = CLU(g_table, x_n, y, z) - c0_g;
 690                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 691                                         c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
 692                                         c1_b = CLU(b_table, x_n, y, z) - c0_b;
 693                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 694                                         c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
 695                                 } else { //rz > rx && rx >= ry
 696                                         c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
 697                                         c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
 698                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 699                                         c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
 700                                         c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
 701                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 702                                         c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
 703                                         c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
 704                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 705                                 }
 706                         }
 707                 } else {
 708                         if (rx >= rz) { //ry > rx && rx >= rz
 709                                 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
 710                                 c2_r = CLU(r_table, x, y_n, z) - c0_r;
 711                                 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
 712                                 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
 713                                 c2_g = CLU(g_table, x, y_n, z) - c0_g;
 714                                 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
 715                                 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
 716                                 c2_b = CLU(b_table, x, y_n, z) - c0_b;
 717                                 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
 718                         } else {
 719                                 if (ry >= rz) { //ry >= rz && rz > rx
 720                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 721                                         c2_r = CLU(r_table, x, y_n, z) - c0_r;
 722                                         c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
 723                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 724                                         c2_g = CLU(g_table, x, y_n, z) - c0_g;
 725                                         c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
 726                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 727                                         c2_b = CLU(b_table, x, y_n, z) - c0_b;
 728                                         c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
 729                                 } else { //rz > ry && ry > rx
 730                                         c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
 731                                         c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
 732                                         c3_r = CLU(r_table, x, y, z_n) - c0_r;
 733                                         c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
 734                                         c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
 735                                         c3_g = CLU(g_table, x, y, z_n) - c0_g;
 736                                         c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
 737                                         c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
 738                                         c3_b = CLU(b_table, x, y, z_n) - c0_b;
 739                                 }
 740                         }
 741                 }
 742
 743                 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
 744                 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 745                 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 746
 747                 dest[r_out] = clamp_u8(clut_r*255.0f);
 748                 dest[1]     = clamp_u8(clut_g*255.0f);
 749                 dest[b_out] = clamp_u8(clut_b*255.0f);
 750                 dest += 3;
 751         }
 752 }
 753
 754 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 755 {
 756         const int r_out = output_format.r;
 757         const int b_out = output_format.b;
 758
 759         unsigned int i;
 760         float (*mat)[4] = transform->matrix;
 761         for (i = 0; i < length; i++) {
 762                 unsigned char device_r = *src++;
 763                 unsigned char device_g = *src++;
 764                 unsigned char device_b = *src++;
 765                 float out_device_r, out_device_g, out_device_b;
 766
 767                 float linear_r = transform->input_gamma_table_r[device_r];
 768                 float linear_g = transform->input_gamma_table_g[device_g];
 769                 float linear_b = transform->input_gamma_table_b[device_b];
 770
 771                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 772                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 773                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 774
 775                 out_linear_r = clamp_float(out_linear_r);
 776                 out_linear_g = clamp_float(out_linear_g);
 777                 out_linear_b = clamp_float(out_linear_b);
 778
 779                 out_device_r = lut_interp_linear(out_linear_r,
 780                                 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 781                 out_device_g = lut_interp_linear(out_linear_g,
 782                                 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 783                 out_device_b = lut_interp_linear(out_linear_b,
 784                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 785
 786                 dest[r_out] = clamp_u8(out_device_r*255);
 787                 dest[1]     = clamp_u8(out_device_g*255);
 788                 dest[b_out] = clamp_u8(out_device_b*255);
 789                 dest += 3;
 790         }
 791 }
 792
 793 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 794 {
 795         const int r_out = output_format.r;
 796         const int b_out = output_format.b;
 797
 798         unsigned int i;
 799         float (*mat)[4] = transform->matrix;
 800         for (i = 0; i < length; i++) {
 801                 unsigned char device_r = *src++;
 802                 unsigned char device_g = *src++;
 803                 unsigned char device_b = *src++;
 804                 unsigned char alpha = *src++;
 805                 float out_device_r, out_device_g, out_device_b;
 806
 807                 float linear_r = transform->input_gamma_table_r[device_r];
 808                 float linear_g = transform->input_gamma_table_g[device_g];
 809                 float linear_b = transform->input_gamma_table_b[device_b];
 810
 811                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 812                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 813                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 814
 815                 out_linear_r = clamp_float(out_linear_r);
 816                 out_linear_g = clamp_float(out_linear_g);
 817                 out_linear_b = clamp_float(out_linear_b);
 818
 819                 out_device_r = lut_interp_linear(out_linear_r,
 820                                 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
 821                 out_device_g = lut_interp_linear(out_linear_g,
 822                                 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 823                 out_device_b = lut_interp_linear(out_linear_b,
 824                                 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 825
 826                 dest[r_out] = clamp_u8(out_device_r*255);
 827                 dest[1]     = clamp_u8(out_device_g*255);
 828                 dest[b_out] = clamp_u8(out_device_b*255);
 829                 dest[3]     = alpha;
 830                 dest += 4;
 831         }
 832 }
 833
 834 #if 0
 835 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
 836 {
 837         const int r_out = output_format.r;
 838         const int b_out = output_format.b;
 839
 840         int i;
 841         float (*mat)[4] = transform->matrix;
 842         for (i = 0; i < length; i++) {
 843                 unsigned char device_r = *src++;
 844                 unsigned char device_g = *src++;
 845                 unsigned char device_b = *src++;
 846
 847                 float linear_r = transform->input_gamma_table_r[device_r];
 848                 float linear_g = transform->input_gamma_table_g[device_g];
 849                 float linear_b = transform->input_gamma_table_b[device_b];
 850
 851                 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
 852                 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
 853                 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
 854
 855                 dest[r_out] = clamp_u8(out_linear_r*255);
 856                 dest[1]     = clamp_u8(out_linear_g*255);
 857                 dest[b_out] = clamp_u8(out_linear_b*255);
 858                 dest += 3;
 859         }
 860 }
 861 #endif
 862
 863 /*
 864  * If users create and destroy objects on different threads, even if the same
 865  * objects aren't used on different threads at the same time, we can still run
 866  * in to trouble with refcounts if they aren't atomic.
 867  *
 868  * This can lead to us prematurely deleting the precache if threads get unlucky
 869  * and write the wrong value to the ref count.
 870  */
 871 static struct precache_output *precache_reference(struct precache_output *p)
 872 {
 873         qcms_atomic_increment(p->ref_count);
 874         return p;
 875 }
 876
 877 static struct precache_output *precache_create()
 878 {
 879         struct precache_output *p = malloc(sizeof(struct precache_output));
 880         if (p)
 881                 p->ref_count = 1;
 882         return p;
 883 }
 884
 885 void precache_release(struct precache_output *p)
 886 {
 887         if (qcms_atomic_decrement(p->ref_count) == 0) {
 888                 free(p);
 889         }
 890 }
 891
 892 #ifdef HAVE_POSIX_MEMALIGN
 893 static qcms_transform *transform_alloc(void)
 894 {
 895         qcms_transform *t;
 896         if (!posix_memalign(&t, 16, sizeof(*t))) {
 897                 return t;
 898         } else {
 899                 return NULL;
 900         }
 901 }
 902 static void transform_free(qcms_transform *t)
 903 {
 904         free(t);
 905 }
 906 #else
 907 static qcms_transform *transform_alloc(void)
 908 {
 909         /* transform needs to be aligned on a 16byte boundrary */
 910         char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
 911         /* make room for a pointer to the block returned by calloc */
 912         void *transform_start = original_block + sizeof(void*);
 913         /* align transform_start */
 914         qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
 915
 916         /* store a pointer to the block returned by calloc so that we can free it later */
 917         void **(original_block_ptr) = (void**)transform_aligned;
 918         if (!original_block)
 919                 return NULL;
 920         original_block_ptr--;
 921         *original_block_ptr = original_block;
 922
 923         return transform_aligned;
 924 }
 925 static void transform_free(qcms_transform *t)
 926 {
 927         /* get at the pointer to the unaligned block returned by calloc */
 928         void **p = (void**)t;
 929         p--;
 930         free(*p);
 931 }
 932 #endif
 933
 934 void qcms_transform_release(qcms_transform *t)
 935 {
 936         /* ensure we only free the gamma tables once even if there are
 937          * multiple references to the same data */
 938
 939         if (t->output_table_r)
 940                 precache_release(t->output_table_r);
 941         if (t->output_table_g)
 942                 precache_release(t->output_table_g);
 943         if (t->output_table_b)
 944                 precache_release(t->output_table_b);
 945
 946         free(t->input_gamma_table_r);
 947         if (t->input_gamma_table_g != t->input_gamma_table_r)
 948                 free(t->input_gamma_table_g);
 949         if (t->input_gamma_table_g != t->input_gamma_table_r &&
 950             t->input_gamma_table_g != t->input_gamma_table_b)
 951                 free(t->input_gamma_table_b);
 952
 953         free(t->input_gamma_table_gray);
 954
 955         free(t->output_gamma_lut_r);
 956         free(t->output_gamma_lut_g);
 957         free(t->output_gamma_lut_b);
 958
 959         transform_free(t);
 960 }
 961
 962 #ifdef X86
 963 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
 964 // mozilla/jpeg)
 965  // -------------------------------------------------------------------------
 966 #if defined(_M_IX86) && defined(_MSC_VER)
 967 #define HAS_CPUID
 968 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
 969    register - I'm not sure if that ever happens on windows, but cpuid isn't
 970    on the critical path so we just preserve the register to be safe and to be
 971    consistent with the non-windows version. */
 972 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
 973        uint32_t a_, b_, c_, d_;
 974        __asm {
 975               xchg   ebx, esi
 976               mov    eax, fxn
 977               cpuid
 978               mov    a_, eax
 979               mov    b_, ebx
 980               mov    c_, ecx
 981               mov    d_, edx
 982               xchg   ebx, esi
 983        }
 984        *a = a_;
 985        *b = b_;
 986        *c = c_;
 987        *d = d_;
 988 }
 989 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
 990 #define HAS_CPUID
 991 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
 992    some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
 993 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
 994
 995         uint32_t a_, b_, c_, d_;
 996        __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
 997                              : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
 998            *a = a_;
 999            *b = b_;
1000            *c = c_;
1001            *d = d_;
1002 }
1003 #endif
1004
1005 // -------------------------Runtime SSEx Detection-----------------------------
1006
1007 /* MMX is always supported per
1008  *  Gecko v1.9.1 minimum CPU requirements */
1009 #define SSE1_EDX_MASK (1UL << 25)
1010 #define SSE2_EDX_MASK (1UL << 26)
1011 #define SSE3_ECX_MASK (1UL <<  0)
1012
1013 static int sse_version_available(void)
1014 {
1015 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1016         /* we know at build time that 64-bit CPUs always have SSE2
1017          * this tells the compiler that non-SSE2 branches will never be
1018          * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1019         return 2;
1020 #elif defined(HAS_CPUID)
1021         static int sse_version = -1;
1022         uint32_t a, b, c, d;
1023         uint32_t function = 0x00000001;
1024
1025         if (sse_version == -1) {
1026                 sse_version = 0;
1027                 cpuid(function, &a, &b, &c, &d);
1028                 if (c & SSE3_ECX_MASK)
1029                         sse_version = 3;
1030                 else if (d & SSE2_EDX_MASK)
1031                         sse_version = 2;
1032                 else if (d & SSE1_EDX_MASK)
1033                         sse_version = 1;
1034         }
1035
1036         return sse_version;
1037 #else
1038         return 0;
1039 #endif
1040 }
1041 #endif
1042
1043 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
1044                                                 {-0.7502f, 1.7135f, 0.0367f},
1045                                                 { 0.0389f,-0.0685f, 1.0296f}},
1046                                                 false};
1047
1048 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f},
1049                                                     { 0.4323053f, 0.5183603f, 0.0492912f},
1050                                                     {-0.0085287f, 0.0400428f, 0.9684867f}},
1051                                                     false};
1052
1053 // See ICCv4 E.3
1054 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1055         float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) /
1056                   (X*bradford_matrix.m[0][0]      + Y*bradford_matrix.m[1][0]      + Z*bradford_matrix.m[2][0]     );
1057         float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) /
1058                   (X*bradford_matrix.m[0][1]      + Y*bradford_matrix.m[1][1]      + Z*bradford_matrix.m[2][1]     );
1059         float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) /
1060                   (X*bradford_matrix.m[0][2]      + Y*bradford_matrix.m[1][2]      + Z*bradford_matrix.m[2][2]     );
1061         struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1062         return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) );
1063 }
1064
1065 void qcms_profile_precache_output_transform(qcms_profile *profile)
1066 {
1067         /* we only support precaching on rgb profiles */
1068         if (profile->color_space != RGB_SIGNATURE)
1069                 return;
1070
1071         if (qcms_supports_iccv4) {
1072                 /* don't precache since we will use the B2A LUT */
1073                 if (profile->B2A0)
1074                         return;
1075
1076                 /* don't precache since we will use the mBA LUT */
1077                 if (profile->mBA)
1078                         return;
1079         }
1080
1081         /* don't precache if we do not have the TRC curves */
1082         if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1083                 return;
1084
1085         if (!profile->output_table_r) {
1086                 profile->output_table_r = precache_create();
1087                 if (profile->output_table_r &&
1088                                 !compute_precache(profile->redTRC, profile->output_table_r->data)) {
1089                         precache_release(profile->output_table_r);
1090                         profile->output_table_r = NULL;
1091                 }
1092         }
1093         if (!profile->output_table_g) {
1094                 profile->output_table_g = precache_create();
1095                 if (profile->output_table_g &&
1096                                 !compute_precache(profile->greenTRC, profile->output_table_g->data)) {
1097                         precache_release(profile->output_table_g);
1098                         profile->output_table_g = NULL;
1099                 }
1100         }
1101         if (!profile->output_table_b) {
1102                 profile->output_table_b = precache_create();
1103                 if (profile->output_table_b &&
1104                                 !compute_precache(profile->blueTRC, profile->output_table_b->data)) {
1105                         precache_release(profile->output_table_b);
1106                         profile->output_table_b = NULL;
1107                 }
1108         }
1109 }
1110
1111 /* Replace the current transformation with a LUT transformation using a given number of sample points */
1112 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out,
1113                                                  int samples, qcms_data_type in_type)
1114 {
1115         /* The range between which 2 consecutive sample points can be used to interpolate */
1116         uint16_t x,y,z;
1117         uint32_t l;
1118         uint32_t lutSize = 3 * samples * samples * samples;
1119         float* src = NULL;
1120         float* dest = NULL;
1121         float* lut = NULL;
1122         float inverse;
1123
1124         src = malloc(lutSize*sizeof(float));
1125         dest = malloc(lutSize*sizeof(float));
1126
1127         if (src && dest) {
1128                 /* Prepare a list of points we want to sample: x, y, z order */
1129                 l = 0;
1130                 inverse = 1 / (float)(samples-1);
1131                 for (x = 0; x < samples; x++) {
1132                         for (y = 0; y < samples; y++) {
1133                                 for (z = 0; z < samples; z++) {
1134                                         src[l++] = x * inverse; // r
1135                                         src[l++] = y * inverse; // g
1136                                         src[l++] = z * inverse; // b
1137                                 }
1138                         }
1139                 }
1140
1141                 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1142
1143                 if (lut) {
1144                         transform->r_clut = &lut[0]; // r
1145                         transform->g_clut = &lut[1]; // g
1146                         transform->b_clut = &lut[2]; // b
1147                         transform->grid_size = samples;
1148                         if (in_type == QCMS_DATA_RGBA_8) {
1149                                 transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1150                         } else {
1151                                 transform->transform_fn = qcms_transform_data_tetra_clut;
1152                         }
1153                 }
1154         }
1155
1156         // XXX: qcms_modular_transform_data may return the lut in either the src or the
1157         // dest buffer. If so, it must not be free-ed.
1158         if (src && lut != src) {
1159                 free(src);
1160         }
1161         if (dest && lut != dest) {
1162                 free(dest);
1163         }
1164
1165         if (lut == NULL) {
1166                 return NULL;
1167         }
1168         return transform;
1169 }
1170
1171 /* Create a transform LUT using the given number of sample points. The transform LUT data is stored
1172    in the output (cube) in bgra format in zyx sample order. */
1173 qcms_bool qcms_transform_create_LUT_zyx_bgra(qcms_profile *in, qcms_profile *out, qcms_intent intent,
1174                                              int samples, unsigned char* cube)
1175 {
1176         uint16_t z,y,x;
1177         uint32_t l,index;
1178         uint32_t lutSize = 3 * samples * samples * samples;
1179
1180         float* src = NULL;
1181         float* dest = NULL;
1182         float* lut = NULL;
1183         float inverse;
1184
1185         src = malloc(lutSize*sizeof(float));
1186         dest = malloc(lutSize*sizeof(float));
1187
1188         if (src && dest) {
1189                 /* Prepare a list of points we want to sample: z, y, x order */
1190                 l = 0;
1191                 inverse = 1 / (float)(samples-1);
1192                 for (z = 0; z < samples; z++) {
1193                         for (y = 0; y < samples; y++) {
1194                                 for (x = 0; x < samples; x++) {
1195                                         src[l++] = x * inverse; // r
1196                                         src[l++] = y * inverse; // g
1197                                         src[l++] = z * inverse; // b
1198                                 }
1199                         }
1200                 }
1201
1202                 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1203
1204                 if (lut) {
1205                         index = l = 0;
1206                         for (z = 0; z < samples; z++) {
1207                                 for (y = 0; y < samples; y++) {
1208                                         for (x = 0; x < samples; x++) {
1209                                                 cube[index++] = (int)floorf(lut[l + 2] * 255.0f + 0.5f); // b
1210                                                 cube[index++] = (int)floorf(lut[l + 1] * 255.0f + 0.5f); // g
1211                                                 cube[index++] = (int)floorf(lut[l + 0] * 255.0f + 0.5f); // r
1212                                                 cube[index++] = 255;                                     // a
1213                                                 l += 3;
1214                                         }
1215                                 }
1216                         }
1217                 }
1218         }
1219
1220         // XXX: qcms_modular_transform_data may return the lut data in either the src or
1221         // dest buffer so free src, dest, and lut with care.
1222
1223         if (src && lut != src)
1224                 free(src);
1225         if (dest && lut != dest)
1226                 free(dest);
1227
1228         if (lut) {
1229                 free(lut);
1230                 return true;
1231         }
1232
1233         return false;
1234 }
1235
1236 #define NO_MEM_TRANSFORM NULL
1237
1238 qcms_transform* qcms_transform_create(
1239                 qcms_profile *in, qcms_data_type in_type,
1240                 qcms_profile *out, qcms_data_type out_type,
1241                 qcms_intent intent)
1242 {
1243         bool precache = false;
1244         int i, j;
1245
1246         qcms_transform *transform = transform_alloc();
1247         if (!transform) {
1248                 return NULL;
1249         }
1250         if (out_type != QCMS_DATA_RGB_8 &&
1251                 out_type != QCMS_DATA_RGBA_8) {
1252             assert(0 && "output type");
1253             qcms_transform_release(transform);
1254             return NULL;
1255         }
1256
1257         transform->transform_flags = 0;
1258
1259         if (out->output_table_r &&
1260                         out->output_table_g &&
1261                         out->output_table_b) {
1262                 precache = true;
1263         }
1264
1265         if (qcms_supports_iccv4 && (in->A2B0 || out->B2A0 || in->mAB || out->mAB)) {
1266                 // Precache the transformation to a CLUT 33x33x33 in size.
1267                 // 33 is used by many profiles and works well in pratice.
1268                 // This evenly divides 256 into blocks of 8x8x8.
1269                 // TODO For transforming small data sets of about 200x200 or less
1270                 // precaching should be avoided.
1271                 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type);
1272                 if (!result) {
1273                         assert(0 && "precacheLUT failed");
1274                         qcms_transform_release(transform);
1275                         return NULL;
1276                 }
1277                 return result;
1278         }
1279
1280         if (precache) {
1281                 transform->output_table_r = precache_reference(out->output_table_r);
1282                 transform->output_table_g = precache_reference(out->output_table_g);
1283                 transform->output_table_b = precache_reference(out->output_table_b);
1284         } else {
1285                 if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1286                         qcms_transform_release(transform);
1287                         return NO_MEM_TRANSFORM;
1288                 }
1289                 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
1290                 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1291                 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
1292                 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
1293                         qcms_transform_release(transform);
1294                         return NO_MEM_TRANSFORM;
1295                 }
1296         }
1297
1298         if (in->color_space == RGB_SIGNATURE) {
1299                 struct matrix in_matrix, out_matrix, result;
1300
1301                 if (in_type != QCMS_DATA_RGB_8 &&
1302                     in_type != QCMS_DATA_RGBA_8){
1303                         assert(0 && "input type");
1304                         qcms_transform_release(transform);
1305                         return NULL;
1306                 }
1307                 if (precache) {
1308 #if defined(SSE2_ENABLE)
1309                     if (sse_version_available() >= 2) {
1310                             if (in_type == QCMS_DATA_RGB_8)
1311                                     transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1312                             else
1313                                     transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1314                     } else
1315 #endif
1316                         {
1317                                 if (in_type == QCMS_DATA_RGB_8)
1318                                         transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
1319                                 else
1320                                         transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
1321                         }
1322                 } else {
1323                         if (in_type == QCMS_DATA_RGB_8)
1324                                 transform->transform_fn = qcms_transform_data_rgb_out_lut;
1325                         else
1326                                 transform->transform_fn = qcms_transform_data_rgba_out_lut;
1327                 }
1328
1329                 //XXX: avoid duplicating tables if we can
1330                 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
1331                 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
1332                 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
1333                 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
1334                         qcms_transform_release(transform);
1335                         return NO_MEM_TRANSFORM;
1336                 }
1337
1338
1339                 /* build combined colorant matrix */
1340                 in_matrix = build_colorant_matrix(in);
1341                 out_matrix = build_colorant_matrix(out);
1342                 out_matrix = matrix_invert(out_matrix);
1343                 if (out_matrix.invalid) {
1344                         qcms_transform_release(transform);
1345                         return NULL;
1346                 }
1347                 result = matrix_multiply(out_matrix, in_matrix);
1348
1349                 /* check for NaN values in the matrix and bail if we find any
1350                    see also https://bugzilla.mozilla.org/show_bug.cgi?id=1170316 */
1351                 for (i = 0 ; i < 3 ; ++i) {
1352                         for (j = 0 ; j < 3 ; ++j) {
1353                                 if (result.m[i][j] != result.m[i][j]) {
1354                                         qcms_transform_release(transform);
1355                                         return NULL;
1356                                 }
1357                         }
1358                 }
1359
1360                 /* store the results in column major mode
1361                  * this makes doing the multiplication with sse easier */
1362                 transform->matrix[0][0] = result.m[0][0];
1363                 transform->matrix[1][0] = result.m[0][1];
1364                 transform->matrix[2][0] = result.m[0][2];
1365                 transform->matrix[0][1] = result.m[1][0];
1366                 transform->matrix[1][1] = result.m[1][1];
1367                 transform->matrix[2][1] = result.m[1][2];
1368                 transform->matrix[0][2] = result.m[2][0];
1369                 transform->matrix[1][2] = result.m[2][1];
1370                 transform->matrix[2][2] = result.m[2][2];
1371
1372                 /* Flag transform as matrix. */
1373                 transform->transform_flags |= TRANSFORM_FLAG_MATRIX;
1374
1375         } else if (in->color_space == GRAY_SIGNATURE) {
1376                 if (in_type != QCMS_DATA_GRAY_8 &&
1377                                 in_type != QCMS_DATA_GRAYA_8){
1378                         assert(0 && "input type");
1379                         qcms_transform_release(transform);
1380                         return NULL;
1381                 }
1382
1383                 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
1384                 if (!transform->input_gamma_table_gray) {
1385                         qcms_transform_release(transform);
1386                         return NO_MEM_TRANSFORM;
1387                 }
1388
1389                 if (precache) {
1390                         if (in_type == QCMS_DATA_GRAY_8) {
1391                                 transform->transform_fn = qcms_transform_data_gray_out_precache;
1392                         } else {
1393                                 transform->transform_fn = qcms_transform_data_graya_out_precache;
1394                         }
1395                 } else {
1396                         if (in_type == QCMS_DATA_GRAY_8) {
1397                                 transform->transform_fn = qcms_transform_data_gray_out_lut;
1398                         } else {
1399                                 transform->transform_fn = qcms_transform_data_graya_out_lut;
1400                         }
1401                 }
1402         } else {
1403                 assert(0 && "unexpected colorspace");
1404                 qcms_transform_release(transform);
1405                 return NULL;
1406         }
1407         return transform;
1408 }
1409
1410 /* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
1411  * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
1412  * of the attribute but is currently only supported by clang */
1413 #if defined(__has_attribute)
1414 #define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
1415 #elif defined(__GNUC__) && defined(__i386__)
1416 #define HAS_FORCE_ALIGN_ARG_POINTER 1
1417 #else
1418 #define HAS_FORCE_ALIGN_ARG_POINTER 0
1419 #endif
1420
1421 #if HAS_FORCE_ALIGN_ARG_POINTER
1422 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1423 __attribute__((__force_align_arg_pointer__))
1424 #endif
1425 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1426 {
1427         static const struct _qcms_format_type output_rgbx = { 0, 2 };
1428
1429         transform->transform_fn(transform, src, dest, length, output_rgbx);
1430 }
1431
1432 void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
1433 {
1434         static const struct _qcms_format_type output_rgbx = { 0, 2 };
1435         static const struct _qcms_format_type output_bgrx = { 2, 0 };
1436
1437         transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
1438 }
1439
1440 qcms_bool qcms_supports_iccv4;
1441
1442 void qcms_enable_iccv4()
1443 {
1444         qcms_supports_iccv4 = true;
1445 }
1446
1447 static inline qcms_bool transform_is_matrix(qcms_transform *t)
1448 {
1449         return (t->transform_flags & TRANSFORM_FLAG_MATRIX) ? true : false;
1450 }
1451
1452 qcms_bool qcms_transform_is_matrix(qcms_transform *t)
1453 {
1454         return transform_is_matrix(t);
1455 }
1456
1457 float qcms_transform_get_matrix(qcms_transform *t, unsigned i, unsigned j)
1458 {
1459         assert(transform_is_matrix(t) && i < 3 && j < 3);
1460
1461         // Return transform matrix element in row major order (permute i and j)
1462
1463         return t->matrix[j][i];
1464 }
1465
1466 static inline qcms_bool supported_trc_type(qcms_trc_type type)
1467 {
1468         return type == QCMS_TRC_HALF_FLOAT;
1469 }
1470
1471 const uint16_t half_float_one = 0x3c00;
1472
1473 size_t qcms_transform_get_input_trc_rgba(qcms_transform *t, qcms_profile *in, qcms_trc_type type, unsigned short *data)
1474 {
1475         const size_t size = 256; // The input gamma tables always have 256 entries.
1476
1477         size_t i;
1478
1479         if (in->color_space != RGB_SIGNATURE || !supported_trc_type(type))
1480                 return 0;
1481
1482         // qcms_profile *in is assumed to be the profile on the input-side of the color transform t.
1483         // When a transform is created, the input gamma curve data is stored in the transform ...
1484
1485         if (!t->input_gamma_table_r || !t->input_gamma_table_g || !t->input_gamma_table_b)
1486                 return 0;
1487
1488         // Report the size if no output data is requested. This allows callers to first work out the
1489         // the curve size, then provide allocated memory sufficient to store the curve rgba data.
1490
1491         if (!data)
1492                 return size;
1493
1494         for (i = 0; i < size; ++i) {
1495                 *data++ = float_to_half_float(t->input_gamma_table_r[i]); // r
1496                 *data++ = float_to_half_float(t->input_gamma_table_g[i]); // g
1497                 *data++ = float_to_half_float(t->input_gamma_table_b[i]); // b
1498                 *data++ = half_float_one;                                 // a
1499         }
1500
1501         return size;
1502 }
1503
1504 const float inverse65535 = (float) (1.0 / 65535.0);
1505
1506 size_t qcms_transform_get_output_trc_rgba(qcms_transform *t, qcms_profile *out, qcms_trc_type type, unsigned short *data)
1507 {
1508         size_t size, i;
1509
1510         if (out->color_space != RGB_SIGNATURE || !supported_trc_type(type))
1511                 return 0;
1512
1513         // qcms_profile *out is assumed to be the profile on the output-side of the transform t.
1514         // If the transform output gamma curves need building, do that. They're usually built when
1515         // the transform was created, but sometimes not due to the output gamma precache ...
1516
1517         if (!out->redTRC || !out->greenTRC || !out->blueTRC)
1518                 return 0;
1519         if (!t->output_gamma_lut_r)
1520                 build_output_lut(out->redTRC, &t->output_gamma_lut_r, &t->output_gamma_lut_r_length);
1521         if (!t->output_gamma_lut_g)
1522                 build_output_lut(out->greenTRC, &t->output_gamma_lut_g, &t->output_gamma_lut_g_length);
1523         if (!t->output_gamma_lut_b)
1524                 build_output_lut(out->blueTRC, &t->output_gamma_lut_b, &t->output_gamma_lut_b_length);
1525
1526         if (!t->output_gamma_lut_r || !t->output_gamma_lut_g || !t->output_gamma_lut_b)
1527                 return 0;
1528
1529         // Output gamma tables should have the same size and should have 4096 entries at most (the
1530         // minimum is 256). Larger tables are rare and ignored here: fail by returning 0.
1531
1532         size = t->output_gamma_lut_r_length;
1533         if (size != t->output_gamma_lut_g_length)
1534                 return 0;
1535         if (size != t->output_gamma_lut_b_length)
1536                 return 0;
1537         if (size < 256 || size > 4096)
1538                 return 0;
1539
1540         // Report the size if no output data is requested. This allows callers to first work out the
1541         // the curve size, then provide allocated memory sufficient to store the curve rgba data.
1542
1543         if (!data)
1544                 return size;
1545
1546         for (i = 0; i < size; ++i) {
1547                 *data++ = float_to_half_float(t->output_gamma_lut_r[i] * inverse65535); // r
1548                 *data++ = float_to_half_float(t->output_gamma_lut_g[i] * inverse65535); // g
1549                 *data++ = float_to_half_float(t->output_gamma_lut_b[i] * inverse65535); // b
1550                 *data++ = half_float_one;                                               // a
1551         }
1552
1553         return size;
1554 }