directmanipulation: Return S_OK form viewport_SetViewportOptions stub.
[wine/zf.git] / dlls / d3dx9_36 / txc_compress_dxtn.c
blob7f10de8076b1124671c5346f0b17c8cab52d0e5c
1 /*
2 * libtxc_dxtn
3 * Version: 1.0
5 * Copyright (C) 2004 Roland Scheidegger All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include "txc_dxtn.h"
29 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
30 not sure if this really reflects visual perception */
31 #define REDWEIGHT 4
32 #define GREENWEIGHT 16
33 #define BLUEWEIGHT 1
35 #define ALPHACUT 127
37 static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
38 GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha)
40 /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
42 /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
43 if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
44 due to their alpha value will influence the result */
45 GLint i, j, colors, z;
46 GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
47 GLint colordist, blockerrlin[2][3];
48 GLubyte nrcolor[2];
49 GLint pixerrorcolorbest[3];
50 GLubyte enc = 0;
51 GLubyte cv[4][4];
52 GLubyte testcolor[2][3];
54 /* fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
55 bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
56 if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
57 ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
58 testcolor[0][0] = bestcolor[0][0];
59 testcolor[0][1] = bestcolor[0][1];
60 testcolor[0][2] = bestcolor[0][2];
61 testcolor[1][0] = bestcolor[1][0];
62 testcolor[1][1] = bestcolor[1][1];
63 testcolor[1][2] = bestcolor[1][2];
65 else {
66 testcolor[1][0] = bestcolor[0][0];
67 testcolor[1][1] = bestcolor[0][1];
68 testcolor[1][2] = bestcolor[0][2];
69 testcolor[0][0] = bestcolor[1][0];
70 testcolor[0][1] = bestcolor[1][1];
71 testcolor[0][2] = bestcolor[1][2];
74 for (i = 0; i < 3; i ++) {
75 cv[0][i] = testcolor[0][i];
76 cv[1][i] = testcolor[1][i];
77 cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
78 cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
81 blockerrlin[0][0] = 0;
82 blockerrlin[0][1] = 0;
83 blockerrlin[0][2] = 0;
84 blockerrlin[1][0] = 0;
85 blockerrlin[1][1] = 0;
86 blockerrlin[1][2] = 0;
88 nrcolor[0] = 0;
89 nrcolor[1] = 0;
91 for (j = 0; j < numypixels; j++) {
92 for (i = 0; i < numxpixels; i++) {
93 pixerrorbest = 0xffffffff;
94 for (colors = 0; colors < 4; colors++) {
95 colordist = srccolors[j][i][0] - (cv[colors][0]);
96 pixerror = colordist * colordist * REDWEIGHT;
97 pixerrorred = colordist;
98 colordist = srccolors[j][i][1] - (cv[colors][1]);
99 pixerror += colordist * colordist * GREENWEIGHT;
100 pixerrorgreen = colordist;
101 colordist = srccolors[j][i][2] - (cv[colors][2]);
102 pixerror += colordist * colordist * BLUEWEIGHT;
103 pixerrorblue = colordist;
104 if (pixerror < pixerrorbest) {
105 enc = colors;
106 pixerrorbest = pixerror;
107 pixerrorcolorbest[0] = pixerrorred;
108 pixerrorcolorbest[1] = pixerrorgreen;
109 pixerrorcolorbest[2] = pixerrorblue;
112 if (enc == 0) {
113 for (z = 0; z < 3; z++) {
114 blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
116 nrcolor[0] += 3;
118 else if (enc == 2) {
119 for (z = 0; z < 3; z++) {
120 blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
122 nrcolor[0] += 2;
123 for (z = 0; z < 3; z++) {
124 blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
126 nrcolor[1] += 1;
128 else if (enc == 3) {
129 for (z = 0; z < 3; z++) {
130 blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
132 nrcolor[0] += 1;
133 for (z = 0; z < 3; z++) {
134 blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
136 nrcolor[1] += 2;
138 else if (enc == 1) {
139 for (z = 0; z < 3; z++) {
140 blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
142 nrcolor[1] += 3;
146 if (nrcolor[0] == 0) nrcolor[0] = 1;
147 if (nrcolor[1] == 0) nrcolor[1] = 1;
148 for (j = 0; j < 2; j++) {
149 for (i = 0; i < 3; i++) {
150 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
151 if (newvalue <= 0)
152 testcolor[j][i] = 0;
153 else if (newvalue >= 255)
154 testcolor[j][i] = 255;
155 else testcolor[j][i] = newvalue;
159 if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
160 (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
161 (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
162 /* both colors are so close they might get encoded as the same 16bit values */
163 GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
165 coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
166 coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
167 coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
168 coldiffmax = coldiffred;
169 if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
170 if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
171 if (coldiffmax > 0) {
172 if (coldiffmax > 4) factor = 2;
173 else if (coldiffmax > 2) factor = 3;
174 else factor = 4;
175 /* Won't do much if the color value is near 255... */
176 /* argh so many ifs */
177 if (testcolor[1][1] >= testcolor[0][1]) {
178 ind1 = 1; ind0 = 0;
180 else {
181 ind1 = 0; ind0 = 1;
183 if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
184 testcolor[ind1][1] += factor * coldiffgreen;
185 else testcolor[ind1][1] = 255;
186 if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
187 if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
188 testcolor[ind1][0] += factor * coldiffred;
189 else testcolor[ind1][0] = 255;
191 else {
192 if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
193 testcolor[ind0][0] += factor * coldiffred;
194 else testcolor[ind0][0] = 255;
196 if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
197 if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
198 testcolor[ind1][2] += factor * coldiffblue;
199 else testcolor[ind1][2] = 255;
201 else {
202 if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
203 testcolor[ind0][2] += factor * coldiffblue;
204 else testcolor[ind0][2] = 255;
209 if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
210 ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
211 for (i = 0; i < 3; i++) {
212 bestcolor[0][i] = testcolor[0][i];
213 bestcolor[1][i] = testcolor[1][i];
216 else {
217 for (i = 0; i < 3; i++) {
218 bestcolor[0][i] = testcolor[1][i];
219 bestcolor[1][i] = testcolor[0][i];
223 /* fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
224 bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
229 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
230 GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
232 /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
234 GLint i, j, colors;
235 GLuint testerror, testerror2, pixerror, pixerrorbest;
236 GLint colordist;
237 GLushort color0, color1, tempcolor;
238 GLuint bits = 0, bits2 = 0;
239 GLubyte *colorptr;
240 GLubyte enc = 0;
241 GLubyte cv[4][4];
243 bestcolor[0][0] = bestcolor[0][0] & 0xf8;
244 bestcolor[0][1] = bestcolor[0][1] & 0xfc;
245 bestcolor[0][2] = bestcolor[0][2] & 0xf8;
246 bestcolor[1][0] = bestcolor[1][0] & 0xf8;
247 bestcolor[1][1] = bestcolor[1][1] & 0xfc;
248 bestcolor[1][2] = bestcolor[1][2] & 0xf8;
250 color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
251 color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
252 if (color0 < color1) {
253 tempcolor = color0; color0 = color1; color1 = tempcolor;
254 colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
258 for (i = 0; i < 3; i++) {
259 cv[0][i] = bestcolor[0][i];
260 cv[1][i] = bestcolor[1][i];
261 cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
262 cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
265 testerror = 0;
266 for (j = 0; j < numypixels; j++) {
267 for (i = 0; i < numxpixels; i++) {
268 pixerrorbest = 0xffffffff;
269 for (colors = 0; colors < 4; colors++) {
270 colordist = srccolors[j][i][0] - cv[colors][0];
271 pixerror = colordist * colordist * REDWEIGHT;
272 colordist = srccolors[j][i][1] - cv[colors][1];
273 pixerror += colordist * colordist * GREENWEIGHT;
274 colordist = srccolors[j][i][2] - cv[colors][2];
275 pixerror += colordist * colordist * BLUEWEIGHT;
276 if (pixerror < pixerrorbest) {
277 pixerrorbest = pixerror;
278 enc = colors;
281 testerror += pixerrorbest;
282 bits |= enc << (2 * (j * 4 + i));
285 /* some hw might disagree but actually decoding should always use 4-color encoding
286 for non-dxt1 formats */
287 if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
288 for (i = 0; i < 3; i++) {
289 cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
290 /* this isn't used. Looks like the black color constant can only be used
291 with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
292 it will decode 3 to black even with DXT3/5), and due to how the color searching works
293 it won't get used even then */
294 cv[3][i] = 0;
296 testerror2 = 0;
297 for (j = 0; j < numypixels; j++) {
298 for (i = 0; i < numxpixels; i++) {
299 pixerrorbest = 0xffffffff;
300 if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
301 enc = 3;
302 pixerrorbest = 0; /* don't calculate error */
304 else {
305 /* we're calculating the same what we have done already for colors 0-1 above... */
306 for (colors = 0; colors < 3; colors++) {
307 colordist = srccolors[j][i][0] - cv[colors][0];
308 pixerror = colordist * colordist * REDWEIGHT;
309 colordist = srccolors[j][i][1] - cv[colors][1];
310 pixerror += colordist * colordist * GREENWEIGHT;
311 colordist = srccolors[j][i][2] - cv[colors][2];
312 pixerror += colordist * colordist * BLUEWEIGHT;
313 if (pixerror < pixerrorbest) {
314 pixerrorbest = pixerror;
315 /* need to exchange colors later */
316 if (colors > 1) enc = colors;
317 else enc = colors ^ 1;
321 testerror2 += pixerrorbest;
322 bits2 |= enc << (2 * (j * 4 + i));
325 } else {
326 testerror2 = 0xffffffff;
329 /* finally we're finished, write back colors and bits */
330 if ((testerror > testerror2) || (haveAlpha)) {
331 *blkaddr++ = color1 & 0xff;
332 *blkaddr++ = color1 >> 8;
333 *blkaddr++ = color0 & 0xff;
334 *blkaddr++ = color0 >> 8;
335 *blkaddr++ = bits2 & 0xff;
336 *blkaddr++ = ( bits2 >> 8) & 0xff;
337 *blkaddr++ = ( bits2 >> 16) & 0xff;
338 *blkaddr = bits2 >> 24;
340 else {
341 *blkaddr++ = color0 & 0xff;
342 *blkaddr++ = color0 >> 8;
343 *blkaddr++ = color1 & 0xff;
344 *blkaddr++ = color1 >> 8;
345 *blkaddr++ = bits & 0xff;
346 *blkaddr++ = ( bits >> 8) & 0xff;
347 *blkaddr++ = ( bits >> 16) & 0xff;
348 *blkaddr = bits >> 24;
352 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
353 GLint numxpixels, GLint numypixels, GLuint type )
355 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
356 present in the picture as base colors */
358 /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
359 vectors are weighted similar to their importance in rgb-luminance conversion
360 doesn't work too well though...
361 This seems to be a rather difficult problem */
363 GLubyte *bestcolor[2];
364 GLubyte basecolors[2][3];
365 GLubyte i, j;
366 GLuint lowcv, highcv, testcv;
367 GLboolean haveAlpha = GL_FALSE;
369 lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
370 srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
371 srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
372 bestcolor[0] = bestcolor[1] = srccolors[0][0];
373 for (j = 0; j < numypixels; j++) {
374 for (i = 0; i < numxpixels; i++) {
375 /* don't use this as a base color if the pixel will get black/transparent anyway */
376 if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
377 testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
378 srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
379 srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
380 if (testcv > highcv) {
381 highcv = testcv;
382 bestcolor[1] = srccolors[j][i];
384 else if (testcv < lowcv) {
385 lowcv = testcv;
386 bestcolor[0] = srccolors[j][i];
389 else haveAlpha = GL_TRUE;
392 /* make sure the original color values won't get touched... */
393 for (j = 0; j < 2; j++) {
394 for (i = 0; i < 3; i++) {
395 basecolors[j][i] = bestcolor[j][i];
398 bestcolor[0] = basecolors[0];
399 bestcolor[1] = basecolors[1];
401 /* try to find better base colors */
402 fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
403 /* find the best encoding for these colors, and store the result */
404 storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
407 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
408 GLubyte alphaenc[16])
410 *blkaddr++ = alphabase1;
411 *blkaddr++ = alphabase2;
412 *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
413 *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
414 *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
415 *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
416 *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
417 *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
420 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
421 GLint numxpixels, GLint numypixels)
423 GLubyte alphabase[2], alphause[2];
424 GLshort alphatest[2];
425 GLuint alphablockerror1, alphablockerror2, alphablockerror3;
426 GLubyte i, j, aindex, acutValues[7];
427 GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
428 GLboolean alphaabsmin = GL_FALSE;
429 GLboolean alphaabsmax = GL_FALSE;
430 GLshort alphadist;
432 /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
433 alphabase[0] = 0xff; alphabase[1] = 0x0;
434 for (j = 0; j < numypixels; j++) {
435 for (i = 0; i < numxpixels; i++) {
436 if (srccolors[j][i][3] == 0)
437 alphaabsmin = GL_TRUE;
438 else if (srccolors[j][i][3] == 255)
439 alphaabsmax = GL_TRUE;
440 else {
441 if (srccolors[j][i][3] > alphabase[1])
442 alphabase[1] = srccolors[j][i][3];
443 if (srccolors[j][i][3] < alphabase[0])
444 alphabase[0] = srccolors[j][i][3];
450 if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
451 /* shortcut here since it is a very common case (and also avoids later problems) */
452 /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
453 /* could also test for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
455 *blkaddr++ = srccolors[0][0][3];
456 blkaddr++;
457 *blkaddr++ = 0;
458 *blkaddr++ = 0;
459 *blkaddr++ = 0;
460 *blkaddr++ = 0;
461 *blkaddr++ = 0;
462 *blkaddr++ = 0;
463 /* fprintf(stderr, "enc0 used\n");*/
464 return;
467 /* find best encoding for alpha0 > alpha1 */
468 /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
469 alphablockerror1 = 0x0;
470 alphablockerror2 = 0xffffffff;
471 alphablockerror3 = 0xffffffff;
472 if (alphaabsmin) alphause[0] = 0;
473 else alphause[0] = alphabase[0];
474 if (alphaabsmax) alphause[1] = 255;
475 else alphause[1] = alphabase[1];
476 /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
477 for (aindex = 0; aindex < 7; aindex++) {
478 /* don't forget here is always rounded down */
479 acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
482 for (j = 0; j < numypixels; j++) {
483 for (i = 0; i < numxpixels; i++) {
484 /* maybe it's overkill to have the most complicated calculation just for the error
485 calculation which we only need to figure out if encoding1 or encoding2 is better... */
486 if (srccolors[j][i][3] > acutValues[0]) {
487 alphaenc1[4*j + i] = 0;
488 alphadist = srccolors[j][i][3] - alphause[1];
490 else if (srccolors[j][i][3] > acutValues[1]) {
491 alphaenc1[4*j + i] = 2;
492 alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
494 else if (srccolors[j][i][3] > acutValues[2]) {
495 alphaenc1[4*j + i] = 3;
496 alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
498 else if (srccolors[j][i][3] > acutValues[3]) {
499 alphaenc1[4*j + i] = 4;
500 alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
502 else if (srccolors[j][i][3] > acutValues[4]) {
503 alphaenc1[4*j + i] = 5;
504 alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
506 else if (srccolors[j][i][3] > acutValues[5]) {
507 alphaenc1[4*j + i] = 6;
508 alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
510 else if (srccolors[j][i][3] > acutValues[6]) {
511 alphaenc1[4*j + i] = 7;
512 alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
514 else {
515 alphaenc1[4*j + i] = 1;
516 alphadist = srccolors[j][i][3] - alphause[0];
518 alphablockerror1 += alphadist * alphadist;
521 /* for (i = 0; i < 16; i++) {
522 fprintf(stderr, "%d ", alphaenc1[i]);
524 fprintf(stderr, "cutVals ");
525 for (i = 0; i < 8; i++) {
526 fprintf(stderr, "%d ", acutValues[i]);
528 fprintf(stderr, "srcVals ");
529 for (j = 0; j < numypixels; j++)
530 for (i = 0; i < numxpixels; i++) {
531 fprintf(stderr, "%d ", srccolors[j][i][3]);
534 fprintf(stderr, "\n");
536 /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
537 are false but try it anyway */
538 if (alphablockerror1 >= 32) {
540 /* don't bother if encoding is already very good, this condition should also imply
541 we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
542 alphablockerror2 = 0;
543 for (aindex = 0; aindex < 5; aindex++) {
544 /* don't forget here is always rounded down */
545 acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
547 for (j = 0; j < numypixels; j++) {
548 for (i = 0; i < numxpixels; i++) {
549 /* maybe it's overkill to have the most complicated calculation just for the error
550 calculation which we only need to figure out if encoding1 or encoding2 is better... */
551 if (srccolors[j][i][3] == 0) {
552 alphaenc2[4*j + i] = 6;
553 alphadist = 0;
555 else if (srccolors[j][i][3] == 255) {
556 alphaenc2[4*j + i] = 7;
557 alphadist = 0;
559 else if (srccolors[j][i][3] <= acutValues[0]) {
560 alphaenc2[4*j + i] = 0;
561 alphadist = srccolors[j][i][3] - alphabase[0];
563 else if (srccolors[j][i][3] <= acutValues[1]) {
564 alphaenc2[4*j + i] = 2;
565 alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
567 else if (srccolors[j][i][3] <= acutValues[2]) {
568 alphaenc2[4*j + i] = 3;
569 alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
571 else if (srccolors[j][i][3] <= acutValues[3]) {
572 alphaenc2[4*j + i] = 4;
573 alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
575 else if (srccolors[j][i][3] <= acutValues[4]) {
576 alphaenc2[4*j + i] = 5;
577 alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
579 else {
580 alphaenc2[4*j + i] = 1;
581 alphadist = srccolors[j][i][3] - alphabase[1];
583 alphablockerror2 += alphadist * alphadist;
588 /* skip this if the error is already very small
589 this encoding is MUCH better on average than #2 though, but expensive! */
590 if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
591 GLshort blockerrlin1 = 0;
592 GLshort blockerrlin2 = 0;
593 GLubyte nralphainrangelow = 0;
594 GLubyte nralphainrangehigh = 0;
595 alphatest[0] = 0xff;
596 alphatest[1] = 0x0;
597 /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
598 for (j = 0; j < numypixels; j++) {
599 for (i = 0; i < numxpixels; i++) {
600 if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
601 alphatest[1] = srccolors[j][i][3];
602 if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
603 alphatest[0] = srccolors[j][i][3];
606 /* shouldn't happen too often, don't really care about those degenerated cases */
607 if (alphatest[1] <= alphatest[0]) {
608 alphatest[0] = 1;
609 alphatest[1] = 254;
610 /* fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
612 for (aindex = 0; aindex < 5; aindex++) {
613 /* don't forget here is always rounded down */
614 acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
617 /* find the "average" difference between the alpha values and the next encoded value.
618 This is then used to calculate new base values.
619 Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
620 since they will see more improvement, and also because the values in the middle are somewhat
621 likely to get no improvement at all (because the base values might move in different directions)?
622 OTOH it would mean the values in the middle are even less likely to get an improvement
624 for (j = 0; j < numypixels; j++) {
625 for (i = 0; i < numxpixels; i++) {
626 if (srccolors[j][i][3] <= alphatest[0] / 2) {
628 else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
630 else if (srccolors[j][i][3] <= acutValues[0]) {
631 blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
632 nralphainrangelow += 1;
634 else if (srccolors[j][i][3] <= acutValues[1]) {
635 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
636 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
637 nralphainrangelow += 1;
638 nralphainrangehigh += 1;
640 else if (srccolors[j][i][3] <= acutValues[2]) {
641 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
642 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
643 nralphainrangelow += 1;
644 nralphainrangehigh += 1;
646 else if (srccolors[j][i][3] <= acutValues[3]) {
647 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
648 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
649 nralphainrangelow += 1;
650 nralphainrangehigh += 1;
652 else if (srccolors[j][i][3] <= acutValues[4]) {
653 blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
654 blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
655 nralphainrangelow += 1;
656 nralphainrangehigh += 1;
658 else {
659 blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
660 nralphainrangehigh += 1;
664 /* shouldn't happen often, needed to avoid div by zero */
665 if (nralphainrangelow == 0) nralphainrangelow = 1;
666 if (nralphainrangehigh == 0) nralphainrangehigh = 1;
667 alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
668 /* fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
669 fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
670 /* again shouldn't really happen often... */
671 if (alphatest[0] < 0) {
672 alphatest[0] = 0;
673 /* fprintf(stderr, "adj alpha base val to 0\n");*/
675 alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
676 if (alphatest[1] > 255) {
677 alphatest[1] = 255;
678 /* fprintf(stderr, "adj alpha base val to 255\n");*/
681 alphablockerror3 = 0;
682 for (aindex = 0; aindex < 5; aindex++) {
683 /* don't forget here is always rounded down */
684 acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
686 for (j = 0; j < numypixels; j++) {
687 for (i = 0; i < numxpixels; i++) {
688 /* maybe it's overkill to have the most complicated calculation just for the error
689 calculation which we only need to figure out if encoding1 or encoding2 is better... */
690 if (srccolors[j][i][3] <= alphatest[0] / 2) {
691 alphaenc3[4*j + i] = 6;
692 alphadist = srccolors[j][i][3];
694 else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
695 alphaenc3[4*j + i] = 7;
696 alphadist = 255 - srccolors[j][i][3];
698 else if (srccolors[j][i][3] <= acutValues[0]) {
699 alphaenc3[4*j + i] = 0;
700 alphadist = srccolors[j][i][3] - alphatest[0];
702 else if (srccolors[j][i][3] <= acutValues[1]) {
703 alphaenc3[4*j + i] = 2;
704 alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
706 else if (srccolors[j][i][3] <= acutValues[2]) {
707 alphaenc3[4*j + i] = 3;
708 alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
710 else if (srccolors[j][i][3] <= acutValues[3]) {
711 alphaenc3[4*j + i] = 4;
712 alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
714 else if (srccolors[j][i][3] <= acutValues[4]) {
715 alphaenc3[4*j + i] = 5;
716 alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
718 else {
719 alphaenc3[4*j + i] = 1;
720 alphadist = srccolors[j][i][3] - alphatest[1];
722 alphablockerror3 += alphadist * alphadist;
727 /* write the alpha values and encoding back. */
728 if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
729 /* if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
730 writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
732 else if (alphablockerror2 <= alphablockerror3) {
733 /* if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
734 writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
736 else {
737 /* fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
738 writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
742 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
743 GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
745 GLubyte i, j, c;
746 const GLchan *curaddr;
747 for (j = 0; j < numypixels; j++) {
748 curaddr = srcaddr + j * srcRowStride * comps;
749 for (i = 0; i < numxpixels; i++) {
750 for (c = 0; c < comps; c++) {
751 srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
758 void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData,
759 GLenum destFormat, GLubyte *dest, GLint dstRowStride)
761 GLubyte *blkaddr = dest;
762 GLubyte srcpixels[4][4][4];
763 const GLchan *srcaddr = srcPixData;
764 GLint numxpixels, numypixels;
765 GLint i, j;
766 GLint dstRowDiff;
768 switch (destFormat) {
769 case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
770 case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
771 /* hmm we used to get called without dstRowStride... */
772 dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0;
773 /* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
774 width, height, dstRowStride); */
775 for (j = 0; j < height; j += 4) {
776 if (height > j + 3) numypixels = 4;
777 else numypixels = height - j;
778 srcaddr = srcPixData + j * width * srccomps;
779 for (i = 0; i < width; i += 4) {
780 if (width > i + 3) numxpixels = 4;
781 else numxpixels = width - i;
782 extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
783 encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
784 srcaddr += srccomps * numxpixels;
785 blkaddr += 8;
787 blkaddr += dstRowDiff;
789 break;
790 case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
791 dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
792 /* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
793 width, height, dstRowStride); */
794 for (j = 0; j < height; j += 4) {
795 if (height > j + 3) numypixels = 4;
796 else numypixels = height - j;
797 srcaddr = srcPixData + j * width * srccomps;
798 for (i = 0; i < width; i += 4) {
799 if (width > i + 3) numxpixels = 4;
800 else numxpixels = width - i;
801 extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
802 *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
803 *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
804 *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
805 *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
806 *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
807 *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
808 *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
809 *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
810 encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
811 srcaddr += srccomps * numxpixels;
812 blkaddr += 8;
814 blkaddr += dstRowDiff;
816 break;
817 case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
818 dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0;
819 /* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
820 width, height, dstRowStride); */
821 for (j = 0; j < height; j += 4) {
822 if (height > j + 3) numypixels = 4;
823 else numypixels = height - j;
824 srcaddr = srcPixData + j * width * srccomps;
825 for (i = 0; i < width; i += 4) {
826 if (width > i + 3) numxpixels = 4;
827 else numxpixels = width - i;
828 extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
829 encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
830 encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
831 srcaddr += srccomps * numxpixels;
832 blkaddr += 16;
834 blkaddr += dstRowDiff;
836 break;
837 default:
838 /* fprintf(stderr, "libdxtn: Bad dstFormat %d in tx_compress_dxtn\n", destFormat); */
839 return;