src/gallium/drivers/llvmpipe/lp_test_blend.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * Unit tests for blend LLVM IR generation
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  *
  35  * Blend computation code derived from code written by
  36  * @author Brian Paul <brian@vmware.com>
  37  */
  38
  39
  40 #include "gallivm/lp_bld_type.h"
  41 #include "gallivm/lp_bld_blend.h"
  42 #include "gallivm/lp_bld_debug.h"
  43 #include "lp_test.h"
  44
  45
  46 enum vector_mode
  47 {
  48    AoS = 0,
  49    SoA = 1
  50 };
  51
  52
  53 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
  54
  55
  56 void
  57 write_tsv_header(FILE *fp)
  58 {
  59    fprintf(fp,
  60            "result\t"
  61            "cycles_per_channel\t"
  62            "mode\t"
  63            "type\t"
  64            "sep_func\t"
  65            "sep_src_factor\t"
  66            "sep_dst_factor\t"
  67            "rgb_func\t"
  68            "rgb_src_factor\t"
  69            "rgb_dst_factor\t"
  70            "alpha_func\t"
  71            "alpha_src_factor\t"
  72            "alpha_dst_factor\n");
  73
  74    fflush(fp);
  75 }
  76
  77
  78 static void
  79 write_tsv_row(FILE *fp,
  80               const struct pipe_blend_state *blend,
  81               enum vector_mode mode,
  82               struct lp_type type,
  83               double cycles,
  84               boolean success)
  85 {
  86    fprintf(fp, "%s\t", success ? "pass" : "fail");
  87
  88    if (mode == AoS) {
  89       fprintf(fp, "%.1f\t", cycles / type.length);
  90       fprintf(fp, "aos\t");
  91    }
  92
  93    if (mode == SoA) {
  94       fprintf(fp, "%.1f\t", cycles / (4 * type.length));
  95       fprintf(fp, "soa\t");
  96    }
  97
  98    fprintf(fp, "%s%u%sx%u\t",
  99            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
 100            type.width,
 101            type.norm ? "n" : "",
 102            type.length);
 103
 104    fprintf(fp,
 105            "%s\t%s\t%s\t",
 106            blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
 107            blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
 108            blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
 109
 110    fprintf(fp,
 111            "%s\t%s\t%s\t%s\t%s\t%s\n",
 112            util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
 113            util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
 114            util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
 115            util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
 116            util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
 117            util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
 118
 119    fflush(fp);
 120 }
 121
 122
 123 static void
 124 dump_blend_type(FILE *fp,
 125                 const struct pipe_blend_state *blend,
 126                 enum vector_mode mode,
 127                 struct lp_type type)
 128 {
 129    fprintf(fp, "%s", mode ? "soa" : "aos");
 130
 131    fprintf(fp, " type=%s%u%sx%u",
 132            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
 133            type.width,
 134            type.norm ? "n" : "",
 135            type.length);
 136
 137    fprintf(fp,
 138            " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
 139            "rgb_func",         util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
 140            "rgb_src_factor",   util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
 141            "rgb_dst_factor",   util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
 142            "alpha_func",       util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
 143            "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
 144            "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
 145
 146    fprintf(fp, " ...\n");
 147    fflush(fp);
 148 }
 149
 150
 151 static LLVMValueRef
 152 add_blend_test(LLVMModuleRef module,
 153                const struct pipe_blend_state *blend,
 154                enum vector_mode mode,
 155                struct lp_type type)
 156 {
 157    LLVMTypeRef ret_type;
 158    LLVMTypeRef vec_type;
 159    LLVMTypeRef args[4];
 160    LLVMValueRef func;
 161    LLVMValueRef src_ptr;
 162    LLVMValueRef dst_ptr;
 163    LLVMValueRef const_ptr;
 164    LLVMValueRef res_ptr;
 165    LLVMBasicBlockRef block;
 166    LLVMBuilderRef builder;
 167
 168    ret_type = LLVMInt64Type();
 169    vec_type = lp_build_vec_type(type);
 170
 171    args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
 172    func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
 173    LLVMSetFunctionCallConv(func, LLVMCCallConv);
 174    src_ptr = LLVMGetParam(func, 0);
 175    dst_ptr = LLVMGetParam(func, 1);
 176    const_ptr = LLVMGetParam(func, 2);
 177    res_ptr = LLVMGetParam(func, 3);
 178
 179    block = LLVMAppendBasicBlock(func, "entry");
 180    builder = LLVMCreateBuilder();
 181    LLVMPositionBuilderAtEnd(builder, block);
 182
 183    if (mode == AoS) {
 184       LLVMValueRef src;
 185       LLVMValueRef dst;
 186       LLVMValueRef con;
 187       LLVMValueRef res;
 188
 189       src = LLVMBuildLoad(builder, src_ptr, "src");
 190       dst = LLVMBuildLoad(builder, dst_ptr, "dst");
 191       con = LLVMBuildLoad(builder, const_ptr, "const");
 192
 193       res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
 194
 195       lp_build_name(res, "res");
 196
 197       LLVMBuildStore(builder, res, res_ptr);
 198    }
 199
 200    if (mode == SoA) {
 201       LLVMValueRef src[4];
 202       LLVMValueRef dst[4];
 203       LLVMValueRef con[4];
 204       LLVMValueRef res[4];
 205       unsigned i;
 206
 207       for(i = 0; i < 4; ++i) {
 208          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
 209          src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
 210          dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
 211          con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
 212          lp_build_name(src[i], "src.%c", "rgba"[i]);
 213          lp_build_name(con[i], "con.%c", "rgba"[i]);
 214          lp_build_name(dst[i], "dst.%c", "rgba"[i]);
 215       }
 216
 217       lp_build_blend_soa(builder, blend, type, src, dst, con, res);
 218
 219       for(i = 0; i < 4; ++i) {
 220          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
 221          lp_build_name(res[i], "res.%c", "rgba"[i]);
 222          LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
 223       }
 224    }
 225
 226    LLVMBuildRetVoid(builder);;
 227
 228    LLVMDisposeBuilder(builder);
 229    return func;
 230 }
 231
 232
 233 /** Add and limit result to ceiling of 1.0 */
 234 #define ADD_SAT(R, A, B) \
 235 do { \
 236    R = (A) + (B);  if (R > 1.0f) R = 1.0f; \
 237 } while (0)
 238
 239 /** Subtract and limit result to floor of 0.0 */
 240 #define SUB_SAT(R, A, B) \
 241 do { \
 242    R = (A) - (B);  if (R < 0.0f) R = 0.0f; \
 243 } while (0)
 244
 245
 246 static void
 247 compute_blend_ref_term(unsigned rgb_factor,
 248                        unsigned alpha_factor,
 249                        const double *factor,
 250                        const double *src,
 251                        const double *dst,
 252                        const double *con,
 253                        double *term)
 254 {
 255    double temp;
 256
 257    switch (rgb_factor) {
 258    case PIPE_BLENDFACTOR_ONE:
 259       term[0] = factor[0]; /* R */
 260       term[1] = factor[1]; /* G */
 261       term[2] = factor[2]; /* B */
 262       break;
 263    case PIPE_BLENDFACTOR_SRC_COLOR:
 264       term[0] = factor[0] * src[0]; /* R */
 265       term[1] = factor[1] * src[1]; /* G */
 266       term[2] = factor[2] * src[2]; /* B */
 267       break;
 268    case PIPE_BLENDFACTOR_SRC_ALPHA:
 269       term[0] = factor[0] * src[3]; /* R */
 270       term[1] = factor[1] * src[3]; /* G */
 271       term[2] = factor[2] * src[3]; /* B */
 272       break;
 273    case PIPE_BLENDFACTOR_DST_COLOR:
 274       term[0] = factor[0] * dst[0]; /* R */
 275       term[1] = factor[1] * dst[1]; /* G */
 276       term[2] = factor[2] * dst[2]; /* B */
 277       break;
 278    case PIPE_BLENDFACTOR_DST_ALPHA:
 279       term[0] = factor[0] * dst[3]; /* R */
 280       term[1] = factor[1] * dst[3]; /* G */
 281       term[2] = factor[2] * dst[3]; /* B */
 282       break;
 283    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
 284       temp = MIN2(src[3], 1.0f - dst[3]);
 285       term[0] = factor[0] * temp; /* R */
 286       term[1] = factor[1] * temp; /* G */
 287       term[2] = factor[2] * temp; /* B */
 288       break;
 289    case PIPE_BLENDFACTOR_CONST_COLOR:
 290       term[0] = factor[0] * con[0]; /* R */
 291       term[1] = factor[1] * con[1]; /* G */
 292       term[2] = factor[2] * con[2]; /* B */
 293       break;
 294    case PIPE_BLENDFACTOR_CONST_ALPHA:
 295       term[0] = factor[0] * con[3]; /* R */
 296       term[1] = factor[1] * con[3]; /* G */
 297       term[2] = factor[2] * con[3]; /* B */
 298       break;
 299    case PIPE_BLENDFACTOR_SRC1_COLOR:
 300       assert(0); /* to do */
 301       break;
 302    case PIPE_BLENDFACTOR_SRC1_ALPHA:
 303       assert(0); /* to do */
 304       break;
 305    case PIPE_BLENDFACTOR_ZERO:
 306       term[0] = 0.0f; /* R */
 307       term[1] = 0.0f; /* G */
 308       term[2] = 0.0f; /* B */
 309       break;
 310    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 311       term[0] = factor[0] * (1.0f - src[0]); /* R */
 312       term[1] = factor[1] * (1.0f - src[1]); /* G */
 313       term[2] = factor[2] * (1.0f - src[2]); /* B */
 314       break;
 315    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 316       term[0] = factor[0] * (1.0f - src[3]); /* R */
 317       term[1] = factor[1] * (1.0f - src[3]); /* G */
 318       term[2] = factor[2] * (1.0f - src[3]); /* B */
 319       break;
 320    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 321       term[0] = factor[0] * (1.0f - dst[3]); /* R */
 322       term[1] = factor[1] * (1.0f - dst[3]); /* G */
 323       term[2] = factor[2] * (1.0f - dst[3]); /* B */
 324       break;
 325    case PIPE_BLENDFACTOR_INV_DST_COLOR:
 326       term[0] = factor[0] * (1.0f - dst[0]); /* R */
 327       term[1] = factor[1] * (1.0f - dst[1]); /* G */
 328       term[2] = factor[2] * (1.0f - dst[2]); /* B */
 329       break;
 330    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 331       term[0] = factor[0] * (1.0f - con[0]); /* R */
 332       term[1] = factor[1] * (1.0f - con[1]); /* G */
 333       term[2] = factor[2] * (1.0f - con[2]); /* B */
 334       break;
 335    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 336       term[0] = factor[0] * (1.0f - con[3]); /* R */
 337       term[1] = factor[1] * (1.0f - con[3]); /* G */
 338       term[2] = factor[2] * (1.0f - con[3]); /* B */
 339       break;
 340    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
 341       assert(0); /* to do */
 342       break;
 343    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
 344       assert(0); /* to do */
 345       break;
 346    default:
 347       assert(0);
 348    }
 349
 350    /*
 351     * Compute src/first term A
 352     */
 353    switch (alpha_factor) {
 354    case PIPE_BLENDFACTOR_ONE:
 355       term[3] = factor[3]; /* A */
 356       break;
 357    case PIPE_BLENDFACTOR_SRC_COLOR:
 358    case PIPE_BLENDFACTOR_SRC_ALPHA:
 359       term[3] = factor[3] * src[3]; /* A */
 360       break;
 361    case PIPE_BLENDFACTOR_DST_COLOR:
 362    case PIPE_BLENDFACTOR_DST_ALPHA:
 363       term[3] = factor[3] * dst[3]; /* A */
 364       break;
 365    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
 366       term[3] = src[3]; /* A */
 367       break;
 368    case PIPE_BLENDFACTOR_CONST_COLOR:
 369    case PIPE_BLENDFACTOR_CONST_ALPHA:
 370       term[3] = factor[3] * con[3]; /* A */
 371       break;
 372    case PIPE_BLENDFACTOR_ZERO:
 373       term[3] = 0.0f; /* A */
 374       break;
 375    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 376    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 377       term[3] = factor[3] * (1.0f - src[3]); /* A */
 378       break;
 379    case PIPE_BLENDFACTOR_INV_DST_COLOR:
 380    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 381       term[3] = factor[3] * (1.0f - dst[3]); /* A */
 382       break;
 383    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 384    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 385       term[3] = factor[3] * (1.0f - con[3]);
 386       break;
 387    default:
 388       assert(0);
 389    }
 390 }
 391
 392
 393 static void
 394 compute_blend_ref(const struct pipe_blend_state *blend,
 395                   const double *src,
 396                   const double *dst,
 397                   const double *con,
 398                   double *res)
 399 {
 400    double src_term[4];
 401    double dst_term[4];
 402
 403    compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
 404                           src, src, dst, con, src_term);
 405    compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
 406                           dst, src, dst, con, dst_term);
 407
 408    /*
 409     * Combine RGB terms
 410     */
 411    switch (blend->rt[0].rgb_func) {
 412    case PIPE_BLEND_ADD:
 413       ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
 414       ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
 415       ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
 416       break;
 417    case PIPE_BLEND_SUBTRACT:
 418       SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
 419       SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
 420       SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
 421       break;
 422    case PIPE_BLEND_REVERSE_SUBTRACT:
 423       SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
 424       SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
 425       SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
 426       break;
 427    case PIPE_BLEND_MIN:
 428       res[0] = MIN2(src_term[0], dst_term[0]); /* R */
 429       res[1] = MIN2(src_term[1], dst_term[1]); /* G */
 430       res[2] = MIN2(src_term[2], dst_term[2]); /* B */
 431       break;
 432    case PIPE_BLEND_MAX:
 433       res[0] = MAX2(src_term[0], dst_term[0]); /* R */
 434       res[1] = MAX2(src_term[1], dst_term[1]); /* G */
 435       res[2] = MAX2(src_term[2], dst_term[2]); /* B */
 436       break;
 437    default:
 438       assert(0);
 439    }
 440
 441    /*
 442     * Combine A terms
 443     */
 444    switch (blend->rt[0].alpha_func) {
 445    case PIPE_BLEND_ADD:
 446       ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
 447       break;
 448    case PIPE_BLEND_SUBTRACT:
 449       SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
 450       break;
 451    case PIPE_BLEND_REVERSE_SUBTRACT:
 452       SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
 453       break;
 454    case PIPE_BLEND_MIN:
 455       res[3] = MIN2(src_term[3], dst_term[3]); /* A */
 456       break;
 457    case PIPE_BLEND_MAX:
 458       res[3] = MAX2(src_term[3], dst_term[3]); /* A */
 459       break;
 460    default:
 461       assert(0);
 462    }
 463 }
 464
 465
 466 PIPE_ALIGN_STACK
 467 static boolean
 468 test_one(unsigned verbose,
 469          FILE *fp,
 470          const struct pipe_blend_state *blend,
 471          enum vector_mode mode,
 472          struct lp_type type)
 473 {
 474    LLVMModuleRef module = NULL;
 475    LLVMValueRef func = NULL;
 476    LLVMExecutionEngineRef engine = NULL;
 477    LLVMModuleProviderRef provider = NULL;
 478    LLVMPassManagerRef pass = NULL;
 479    char *error = NULL;
 480    blend_test_ptr_t blend_test_ptr;
 481    boolean success;
 482    const unsigned n = LP_TEST_NUM_SAMPLES;
 483    int64_t cycles[LP_TEST_NUM_SAMPLES];
 484    double cycles_avg = 0.0;
 485    unsigned i, j;
 486
 487    if(verbose >= 1)
 488       dump_blend_type(stdout, blend, mode, type);
 489
 490    module = LLVMModuleCreateWithName("test");
 491
 492    func = add_blend_test(module, blend, mode, type);
 493
 494    if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
 495       LLVMDumpModule(module);
 496       abort();
 497    }
 498    LLVMDisposeMessage(error);
 499
 500    provider = LLVMCreateModuleProviderForExistingModule(module);
 501    if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
 502       if(verbose < 1)
 503          dump_blend_type(stderr, blend, mode, type);
 504       fprintf(stderr, "%s\n", error);
 505       LLVMDisposeMessage(error);
 506       abort();
 507    }
 508
 509 #if 0
 510    pass = LLVMCreatePassManager();
 511    LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
 512    /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
 513     * but there are more on SVN. */
 514    LLVMAddConstantPropagationPass(pass);
 515    LLVMAddInstructionCombiningPass(pass);
 516    LLVMAddPromoteMemoryToRegisterPass(pass);
 517    LLVMAddGVNPass(pass);
 518    LLVMAddCFGSimplificationPass(pass);
 519    LLVMRunPassManager(pass, module);
 520 #else
 521    (void)pass;
 522 #endif
 523
 524    if(verbose >= 2)
 525       LLVMDumpModule(module);
 526
 527    blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
 528
 529    if(verbose >= 2)
 530       lp_disassemble(blend_test_ptr);
 531
 532    success = TRUE;
 533    for(i = 0; i < n && success; ++i) {
 534       if(mode == AoS) {
 535          PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
 536          PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
 537          PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
 538          PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
 539          PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
 540          int64_t start_counter = 0;
 541          int64_t end_counter = 0;
 542
 543          random_vec(type, src);
 544          random_vec(type, dst);
 545          random_vec(type, con);
 546
 547          {
 548             double fsrc[LP_MAX_VECTOR_LENGTH];
 549             double fdst[LP_MAX_VECTOR_LENGTH];
 550             double fcon[LP_MAX_VECTOR_LENGTH];
 551             double fref[LP_MAX_VECTOR_LENGTH];
 552
 553             read_vec(type, src, fsrc);
 554             read_vec(type, dst, fdst);
 555             read_vec(type, con, fcon);
 556
 557             for(j = 0; j < type.length; j += 4)
 558                compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
 559
 560             write_vec(type, ref, fref);
 561          }
 562
 563          start_counter = rdtsc();
 564          blend_test_ptr(src, dst, con, res);
 565          end_counter = rdtsc();
 566
 567          cycles[i] = end_counter - start_counter;
 568
 569          if(!compare_vec(type, res, ref)) {
 570             success = FALSE;
 571
 572             if(verbose < 1)
 573                dump_blend_type(stderr, blend, mode, type);
 574             fprintf(stderr, "MISMATCH\n");
 575
 576             fprintf(stderr, "  Src: ");
 577             dump_vec(stderr, type, src);
 578             fprintf(stderr, "\n");
 579
 580             fprintf(stderr, "  Dst: ");
 581             dump_vec(stderr, type, dst);
 582             fprintf(stderr, "\n");
 583
 584             fprintf(stderr, "  Con: ");
 585             dump_vec(stderr, type, con);
 586             fprintf(stderr, "\n");
 587
 588             fprintf(stderr, "  Res: ");
 589             dump_vec(stderr, type, res);
 590             fprintf(stderr, "\n");
 591
 592             fprintf(stderr, "  Ref: ");
 593             dump_vec(stderr, type, ref);
 594             fprintf(stderr, "\n");
 595          }
 596       }
 597
 598       if(mode == SoA) {
 599          const unsigned stride = type.length*type.width/8;
 600          PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
 601          PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
 602          PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
 603          PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
 604          PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
 605          int64_t start_counter = 0;
 606          int64_t end_counter = 0;
 607          boolean mismatch;
 608
 609          for(j = 0; j < 4; ++j) {
 610             random_vec(type, src + j*stride);
 611             random_vec(type, dst + j*stride);
 612             random_vec(type, con + j*stride);
 613          }
 614
 615          {
 616             double fsrc[4];
 617             double fdst[4];
 618             double fcon[4];
 619             double fref[4];
 620             unsigned k;
 621
 622             for(k = 0; k < type.length; ++k) {
 623                for(j = 0; j < 4; ++j) {
 624                   fsrc[j] = read_elem(type, src + j*stride, k);
 625                   fdst[j] = read_elem(type, dst + j*stride, k);
 626                   fcon[j] = read_elem(type, con + j*stride, k);
 627                }
 628
 629                compute_blend_ref(blend, fsrc, fdst, fcon, fref);
 630
 631                for(j = 0; j < 4; ++j)
 632                   write_elem(type, ref + j*stride, k, fref[j]);
 633             }
 634          }
 635
 636          start_counter = rdtsc();
 637          blend_test_ptr(src, dst, con, res);
 638          end_counter = rdtsc();
 639
 640          cycles[i] = end_counter - start_counter;
 641
 642          mismatch = FALSE;
 643          for (j = 0; j < 4; ++j)
 644             if(!compare_vec(type, res + j*stride, ref + j*stride))
 645                mismatch = TRUE;
 646
 647          if (mismatch) {
 648             success = FALSE;
 649
 650             if(verbose < 1)
 651                dump_blend_type(stderr, blend, mode, type);
 652             fprintf(stderr, "MISMATCH\n");
 653             for(j = 0; j < 4; ++j) {
 654                char channel = "RGBA"[j];
 655                fprintf(stderr, "  Src%c: ", channel);
 656                dump_vec(stderr, type, src + j*stride);
 657                fprintf(stderr, "\n");
 658
 659                fprintf(stderr, "  Dst%c: ", channel);
 660                dump_vec(stderr, type, dst + j*stride);
 661                fprintf(stderr, "\n");
 662
 663                fprintf(stderr, "  Con%c: ", channel);
 664                dump_vec(stderr, type, con + j*stride);
 665                fprintf(stderr, "\n");
 666
 667                fprintf(stderr, "  Res%c: ", channel);
 668                dump_vec(stderr, type, res + j*stride);
 669                fprintf(stderr, "\n");
 670
 671                fprintf(stderr, "  Ref%c: ", channel);
 672                dump_vec(stderr, type, ref + j*stride);
 673                fprintf(stderr, "\n");
 674             }
 675          }
 676       }
 677    }
 678
 679    /*
 680     * Unfortunately the output of cycle counter is not very reliable as it comes
 681     * -- sometimes we get outliers (due IRQs perhaps?) which are
 682     * better removed to avoid random or biased data.
 683     */
 684    {
 685       double sum = 0.0, sum2 = 0.0;
 686       double avg, std;
 687       unsigned m;
 688
 689       for(i = 0; i < n; ++i) {
 690          sum += cycles[i];
 691          sum2 += cycles[i]*cycles[i];
 692       }
 693
 694       avg = sum/n;
 695       std = sqrtf((sum2 - n*avg*avg)/n);
 696
 697       m = 0;
 698       sum = 0.0;
 699       for(i = 0; i < n; ++i) {
 700          if(fabs(cycles[i] - avg) <= 4.0*std) {
 701             sum += cycles[i];
 702             ++m;
 703          }
 704       }
 705
 706       cycles_avg = sum/m;
 707
 708    }
 709
 710    if(fp)
 711       write_tsv_row(fp, blend, mode, type, cycles_avg, success);
 712
 713    if (!success) {
 714       if(verbose < 2)
 715          LLVMDumpModule(module);
 716       LLVMWriteBitcodeToFile(module, "blend.bc");
 717       fprintf(stderr, "blend.bc written\n");
 718       fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
 719       abort();
 720    }
 721
 722    LLVMFreeMachineCodeForFunction(engine, func);
 723
 724    LLVMDisposeExecutionEngine(engine);
 725    if(pass)
 726       LLVMDisposePassManager(pass);
 727
 728    return success;
 729 }
 730
 731
 732 const unsigned
 733 blend_factors[] = {
 734    PIPE_BLENDFACTOR_ZERO,
 735    PIPE_BLENDFACTOR_ONE,
 736    PIPE_BLENDFACTOR_SRC_COLOR,
 737    PIPE_BLENDFACTOR_SRC_ALPHA,
 738    PIPE_BLENDFACTOR_DST_COLOR,
 739    PIPE_BLENDFACTOR_DST_ALPHA,
 740    PIPE_BLENDFACTOR_CONST_COLOR,
 741    PIPE_BLENDFACTOR_CONST_ALPHA,
 742 #if 0
 743    PIPE_BLENDFACTOR_SRC1_COLOR,
 744    PIPE_BLENDFACTOR_SRC1_ALPHA,
 745 #endif
 746    PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
 747    PIPE_BLENDFACTOR_INV_SRC_COLOR,
 748    PIPE_BLENDFACTOR_INV_SRC_ALPHA,
 749    PIPE_BLENDFACTOR_INV_DST_COLOR,
 750    PIPE_BLENDFACTOR_INV_DST_ALPHA,
 751    PIPE_BLENDFACTOR_INV_CONST_COLOR,
 752    PIPE_BLENDFACTOR_INV_CONST_ALPHA,
 753 #if 0
 754    PIPE_BLENDFACTOR_INV_SRC1_COLOR,
 755    PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
 756 #endif
 757 };
 758
 759
 760 const unsigned
 761 blend_funcs[] = {
 762    PIPE_BLEND_ADD,
 763    PIPE_BLEND_SUBTRACT,
 764    PIPE_BLEND_REVERSE_SUBTRACT,
 765    PIPE_BLEND_MIN,
 766    PIPE_BLEND_MAX
 767 };
 768
 769
 770 const struct lp_type blend_types[] = {
 771    /* float, fixed,  sign,  norm, width, len */
 772    {   TRUE, FALSE, FALSE,  TRUE,    32,   4 }, /* f32 x 4 */
 773    {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
 774 };
 775
 776
 777 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
 778 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
 779 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
 780
 781
 782 boolean
 783 test_all(unsigned verbose, FILE *fp)
 784 {
 785    const unsigned *rgb_func;
 786    const unsigned *rgb_src_factor;
 787    const unsigned *rgb_dst_factor;
 788    const unsigned *alpha_func;
 789    const unsigned *alpha_src_factor;
 790    const unsigned *alpha_dst_factor;
 791    struct pipe_blend_state blend;
 792    enum vector_mode mode;
 793    const struct lp_type *type;
 794    bool success = TRUE;
 795
 796    for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
 797       for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
 798          for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
 799             for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
 800                for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
 801                   for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
 802                      for(mode = 0; mode < 2; ++mode) {
 803                         for(type = blend_types; type < &blend_types[num_types]; ++type) {
 804
 805                            if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
 806                               *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
 807                               continue;
 808
 809                            memset(&blend, 0, sizeof blend);
 810                            blend.rt[0].blend_enable      = 1;
 811                            blend.rt[0].rgb_func          = *rgb_func;
 812                            blend.rt[0].rgb_src_factor    = *rgb_src_factor;
 813                            blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
 814                            blend.rt[0].alpha_func        = *alpha_func;
 815                            blend.rt[0].alpha_src_factor  = *alpha_src_factor;
 816                            blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
 817                            blend.rt[0].colormask         = PIPE_MASK_RGBA;
 818
 819                            if(!test_one(verbose, fp, &blend, mode, *type))
 820                              success = FALSE;
 821
 822                         }
 823                      }
 824                   }
 825                }
 826             }
 827          }
 828       }
 829    }
 830
 831    return success;
 832 }
 833
 834
 835 boolean
 836 test_some(unsigned verbose, FILE *fp, unsigned long n)
 837 {
 838    const unsigned *rgb_func;
 839    const unsigned *rgb_src_factor;
 840    const unsigned *rgb_dst_factor;
 841    const unsigned *alpha_func;
 842    const unsigned *alpha_src_factor;
 843    const unsigned *alpha_dst_factor;
 844    struct pipe_blend_state blend;
 845    enum vector_mode mode;
 846    const struct lp_type *type;
 847    unsigned long i;
 848    bool success = TRUE;
 849
 850    for(i = 0; i < n; ++i) {
 851       rgb_func = &blend_funcs[rand() % num_funcs];
 852       alpha_func = &blend_funcs[rand() % num_funcs];
 853       rgb_src_factor = &blend_factors[rand() % num_factors];
 854       alpha_src_factor = &blend_factors[rand() % num_factors];
 855
 856       do {
 857          rgb_dst_factor = &blend_factors[rand() % num_factors];
 858       } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
 859
 860       do {
 861          alpha_dst_factor = &blend_factors[rand() % num_factors];
 862       } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
 863
 864       mode = rand() & 1;
 865
 866       type = &blend_types[rand() % num_types];
 867
 868       memset(&blend, 0, sizeof blend);
 869       blend.rt[0].blend_enable      = 1;
 870       blend.rt[0].rgb_func          = *rgb_func;
 871       blend.rt[0].rgb_src_factor    = *rgb_src_factor;
 872       blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
 873       blend.rt[0].alpha_func        = *alpha_func;
 874       blend.rt[0].alpha_src_factor  = *alpha_src_factor;
 875       blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
 876       blend.rt[0].colormask         = PIPE_MASK_RGBA;
 877
 878       if(!test_one(verbose, fp, &blend, mode, *type))
 879         success = FALSE;
 880    }
 881
 882    return success;
 883 }