1 /*============================================================================
4 NVIDIA FXAA 3.11 by TIMOTHY LOTTES
7 ------------------------------------------------------------------------------
8 COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED.
9 ------------------------------------------------------------------------------
10 TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
11 *AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
12 OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF
13 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA
14 OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR
15 CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR
16 LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION,
17 OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE
18 THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
21 ------------------------------------------------------------------------------
23 ------------------------------------------------------------------------------
25 In the shader source, setup defines for the desired configuration.
26 When providing multiple shaders (for different presets),
27 simply setup the defines differently in multiple files.
32 #define FXAA_QUALITY__PRESET 12
43 Then include this file,
48 Then call the FXAA pixel shader from within your desired shader.
49 Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
50 As for FXAA 3.11 all inputs for all shaders are the same
51 to enable easy porting between platforms.
53 return FxaaPixelShader(...);
56 Insure pass prior to FXAA outputs RGBL (see next section).
59 #define FXAA_GREEN_AS_LUMA 1
62 Setup engine to provide the following constants
63 which are used in the FxaaPixelShader() inputs,
65 FxaaFloat2 fxaaQualityRcpFrame,
66 FxaaFloat4 fxaaConsoleRcpFrameOpt,
67 FxaaFloat4 fxaaConsoleRcpFrameOpt2,
68 FxaaFloat4 fxaaConsole360RcpFrameOpt2,
69 FxaaFloat fxaaQualitySubpix,
70 FxaaFloat fxaaQualityEdgeThreshold,
71 FxaaFloat fxaaQualityEdgeThresholdMin,
72 FxaaFloat fxaaConsoleEdgeSharpness,
73 FxaaFloat fxaaConsoleEdgeThreshold,
74 FxaaFloat fxaaConsoleEdgeThresholdMin,
75 FxaaFloat4 fxaaConsole360ConstDir
77 Look at the FXAA Quality FxaaPixelShader() for docs on inputs.
80 Have FXAA vertex shader run as a full screen triangle,
81 and output "pos" and "fxaaConsolePosPos"
82 such that inputs in the pixel shader provide,
84 // {xy} = center of pixel
87 // {xy__} = upper left of pixel
88 // {__zw} = lower right of pixel
89 FxaaFloat4 fxaaConsolePosPos,
92 Insure the texture sampler(s) used by FXAA are set to bilinear filtering.
95 ------------------------------------------------------------------------------
96 INTEGRATION - RGBL AND COLORSPACE
97 ------------------------------------------------------------------------------
98 FXAA3 requires RGBL as input unless the following is set,
100 #define FXAA_GREEN_AS_LUMA 1
102 In which case the engine uses green in place of luma,
103 and requires RGB input is in a non-linear colorspace.
105 RGB should be LDR (low dynamic range).
106 Specifically do FXAA after tonemapping.
108 RGB data as returned by a texture fetch can be non-linear,
109 or linear when FXAA_GREEN_AS_LUMA is not set.
110 Note an "sRGB format" texture counts as linear,
111 because the result of a texture fetch is linear data.
112 Regular "RGBA8" textures in the sRGB colorspace are non-linear.
114 If FXAA_GREEN_AS_LUMA is not set,
115 luma must be stored in the alpha channel prior to running FXAA.
116 This luma should be in a perceptual space (could be gamma 2.0).
117 Example pass before FXAA where output is gamma 2.0 encoded,
119 color.rgb = ToneMap(color.rgb); // linear color output
120 color.rgb = sqrt(color.rgb); // gamma 2.0 color output
125 color.rgb = ToneMap(color.rgb); // linear color output
126 color.rgb = sqrt(color.rgb); // gamma 2.0 color output
127 color.a = dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114)); // compute luma
130 Another example where output is linear encoded,
131 say for instance writing to an sRGB formated render target,
132 where the render target does the conversion back to sRGB after blending,
134 color.rgb = ToneMap(color.rgb); // linear color output
139 color.rgb = ToneMap(color.rgb); // linear color output
140 color.a = sqrt(dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114))); // compute luma
143 Getting luma correct is required for the algorithm to work correctly.
146 ------------------------------------------------------------------------------
147 BEING LINEARLY CORRECT?
148 ------------------------------------------------------------------------------
149 Applying FXAA to a framebuffer with linear RGB color will look worse.
150 This is very counter intuitive, but happends to be true in this case.
151 The reason is because dithering artifacts will be more visiable
152 in a linear colorspace.
155 ------------------------------------------------------------------------------
157 ------------------------------------------------------------------------------
158 Q. What if the engine is blending into RGB before wanting to run FXAA?
160 A. In the last opaque pass prior to FXAA,
161 have the pass write out luma into alpha.
162 Then blend into RGB only.
163 FXAA should be able to run ok
164 assuming the blending pass did not any add aliasing.
165 This should be the common case for particles and common blending passes.
167 A. Or use FXAA_GREEN_AS_LUMA.
169 ============================================================================*/
171 /*============================================================================
175 ============================================================================*/
177 // FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE).
178 // FXAA_360_OPT is a prototype for the new optimized 360 version.
181 // 0 = Don't use API.
183 /*--------------------------------------------------------------------------*/
187 /*--------------------------------------------------------------------------*/
191 /*--------------------------------------------------------------------------*/
193 #define FXAA_360_OPT 0
195 /*==========================================================================*/
199 // The high quality PC algorithm.
203 /*--------------------------------------------------------------------------*/
204 #ifndef FXAA_PC_CONSOLE
206 // The console algorithm for PC is included
207 // for developers targeting really low spec machines.
208 // Likely better to just run FXAA_PC, and use a really low preset.
210 #define FXAA_PC_CONSOLE 0
212 /*--------------------------------------------------------------------------*/
213 #ifndef FXAA_GLSL_120
214 #define FXAA_GLSL_120 0
216 /*--------------------------------------------------------------------------*/
217 #ifndef FXAA_GLSL_130
218 #define FXAA_GLSL_130 0
220 /*--------------------------------------------------------------------------*/
222 #define FXAA_HLSL_3 0
224 /*--------------------------------------------------------------------------*/
226 #define FXAA_HLSL_4 0
228 /*--------------------------------------------------------------------------*/
230 #define FXAA_HLSL_5 0
232 /*==========================================================================*/
233 #ifndef FXAA_GREEN_AS_LUMA
235 // For those using non-linear color,
236 // and either not able to get luma in alpha, or not wanting to,
237 // this enables FXAA to run using green as a proxy for luma.
238 // So with this enabled, no need to pack luma in alpha.
240 // This will turn off AA on anything which lacks some amount of green.
241 // Pure red and blue or combination of only R and B, will get no AA.
243 // Might want to lower the settings for both,
244 // fxaaConsoleEdgeThresholdMin
245 // fxaaQualityEdgeThresholdMin
246 // In order to insure AA does not get turned off on colors
247 // which contain a minor amount of green.
252 #define FXAA_GREEN_AS_LUMA 0
254 /*--------------------------------------------------------------------------*/
255 #ifndef FXAA_EARLY_EXIT
257 // Controls algorithm's early exit path.
258 // On PS3 turning this ON adds 2 cycles to the shader.
259 // On 360 turning this OFF adds 10ths of a millisecond to the shader.
260 // Turning this off on console will result in a more blurry image.
261 // So this defaults to on.
266 #define FXAA_EARLY_EXIT 1
268 /*--------------------------------------------------------------------------*/
271 // Only valid for PC OpenGL currently.
272 // Probably will not work when FXAA_GREEN_AS_LUMA = 1.
274 // 1 = Use discard on pixels which don't need AA.
275 // For APIs which enable concurrent TEX+ROP from same surface.
276 // 0 = Return unchanged color on pixels which don't need AA.
278 #define FXAA_DISCARD 0
280 /*--------------------------------------------------------------------------*/
281 #ifndef FXAA_FAST_PIXEL_OFFSET
283 // Used for GLSL 120 only.
285 // 1 = GL API supports fast pixel offsets
286 // 0 = do not use fast pixel offsets
288 #ifdef GL_EXT_gpu_shader4
289 #define FXAA_FAST_PIXEL_OFFSET 1
291 #ifdef GL_NV_gpu_shader5
292 #define FXAA_FAST_PIXEL_OFFSET 1
294 #ifdef GL_ARB_gpu_shader5
295 #define FXAA_FAST_PIXEL_OFFSET 1
297 #ifndef FXAA_FAST_PIXEL_OFFSET
298 #define FXAA_FAST_PIXEL_OFFSET 0
301 /*--------------------------------------------------------------------------*/
302 #ifndef FXAA_GATHER4_ALPHA
304 // 1 = API supports gather4 on alpha channel.
305 // 0 = API does not support gather4 on alpha channel.
307 #if (FXAA_HLSL_5 == 1)
308 #define FXAA_GATHER4_ALPHA 1
310 #ifdef GL_ARB_gpu_shader5
311 #define FXAA_GATHER4_ALPHA 1
313 #ifdef GL_NV_gpu_shader5
314 #define FXAA_GATHER4_ALPHA 1
316 #ifndef FXAA_GATHER4_ALPHA
317 #define FXAA_GATHER4_ALPHA 0
321 /*============================================================================
322 FXAA CONSOLE PS3 - TUNING KNOBS
323 ============================================================================*/
324 #ifndef FXAA_CONSOLE__PS3_EDGE_SHARPNESS
326 // Consoles the sharpness of edges on PS3 only.
327 // Non-PS3 tuning is done with shader input.
329 // Due to the PS3 being ALU bound,
330 // there are only two safe values here: 4 and 8.
331 // These options use the shaders ability to a free *|/ by 2|4|8.
335 // 2.0 is really soft (good for vector graphics inputs)
338 #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 8.0
341 #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 4.0
344 #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 2.0
347 /*--------------------------------------------------------------------------*/
348 #ifndef FXAA_CONSOLE__PS3_EDGE_THRESHOLD
351 // Non-PS3 tuning is done with shader input.
353 // The minimum amount of local contrast required to apply algorithm.
354 // The console setting has a different mapping than the quality setting.
356 // This only applies when FXAA_EARLY_EXIT is 1.
358 // Due to the PS3 being ALU bound,
359 // there are only two safe values here: 0.25 and 0.125.
360 // These options use the shaders ability to a free *|/ by 2|4|8.
362 // 0.125 leaves less aliasing, but is softer
363 // 0.25 leaves more aliasing, and is sharper
366 #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.125
368 #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.25
372 /*============================================================================
373 FXAA QUALITY - TUNING KNOBS
374 ------------------------------------------------------------------------------
375 NOTE the other tuning knobs are now in the shader function inputs!
376 ============================================================================*/
377 #ifndef FXAA_QUALITY__PRESET
379 // Choose the quality preset.
380 // This needs to be compiled into the shader as it effects code.
381 // Best option to include multiple presets is to
382 // in each shader define the preset, then include this file.
385 // -----------------------------------------------------------------------
386 // 10 to 15 - default medium dither (10=fastest, 15=highest quality)
387 // 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality)
388 // 39 - no dither, very expensive
391 // -----------------------------------------------------------------------
392 // 12 = slightly faster then FXAA 3.9 and higher edge quality (default)
393 // 13 = about same speed as FXAA 3.9 and better than 12
394 // 23 = closest to FXAA 3.9 visually and performance wise
395 // _ = the lowest digit is directly related to performance
396 // _ = the highest digit is directly related to style
398 #define FXAA_QUALITY__PRESET 12
402 /*============================================================================
404 FXAA QUALITY - PRESETS
406 ============================================================================*/
408 /*============================================================================
409 FXAA QUALITY - MEDIUM DITHER PRESETS
410 ============================================================================*/
411 #if (FXAA_QUALITY__PRESET == 10)
412 #define FXAA_QUALITY__PS 3
413 #define FXAA_QUALITY__P0 1.5
414 #define FXAA_QUALITY__P1 3.0
415 #define FXAA_QUALITY__P2 12.0
417 /*--------------------------------------------------------------------------*/
418 #if (FXAA_QUALITY__PRESET == 11)
419 #define FXAA_QUALITY__PS 4
420 #define FXAA_QUALITY__P0 1.0
421 #define FXAA_QUALITY__P1 1.5
422 #define FXAA_QUALITY__P2 3.0
423 #define FXAA_QUALITY__P3 12.0
425 /*--------------------------------------------------------------------------*/
426 #if (FXAA_QUALITY__PRESET == 12)
427 #define FXAA_QUALITY__PS 5
428 #define FXAA_QUALITY__P0 1.0
429 #define FXAA_QUALITY__P1 1.5
430 #define FXAA_QUALITY__P2 2.0
431 #define FXAA_QUALITY__P3 4.0
432 #define FXAA_QUALITY__P4 12.0
434 /*--------------------------------------------------------------------------*/
435 #if (FXAA_QUALITY__PRESET == 13)
436 #define FXAA_QUALITY__PS 6
437 #define FXAA_QUALITY__P0 1.0
438 #define FXAA_QUALITY__P1 1.5
439 #define FXAA_QUALITY__P2 2.0
440 #define FXAA_QUALITY__P3 2.0
441 #define FXAA_QUALITY__P4 4.0
442 #define FXAA_QUALITY__P5 12.0
444 /*--------------------------------------------------------------------------*/
445 #if (FXAA_QUALITY__PRESET == 14)
446 #define FXAA_QUALITY__PS 7
447 #define FXAA_QUALITY__P0 1.0
448 #define FXAA_QUALITY__P1 1.5
449 #define FXAA_QUALITY__P2 2.0
450 #define FXAA_QUALITY__P3 2.0
451 #define FXAA_QUALITY__P4 2.0
452 #define FXAA_QUALITY__P5 4.0
453 #define FXAA_QUALITY__P6 12.0
455 /*--------------------------------------------------------------------------*/
456 #if (FXAA_QUALITY__PRESET == 15)
457 #define FXAA_QUALITY__PS 8
458 #define FXAA_QUALITY__P0 1.0
459 #define FXAA_QUALITY__P1 1.5
460 #define FXAA_QUALITY__P2 2.0
461 #define FXAA_QUALITY__P3 2.0
462 #define FXAA_QUALITY__P4 2.0
463 #define FXAA_QUALITY__P5 2.0
464 #define FXAA_QUALITY__P6 4.0
465 #define FXAA_QUALITY__P7 12.0
468 /*============================================================================
469 FXAA QUALITY - LOW DITHER PRESETS
470 ============================================================================*/
471 #if (FXAA_QUALITY__PRESET == 20)
472 #define FXAA_QUALITY__PS 3
473 #define FXAA_QUALITY__P0 1.5
474 #define FXAA_QUALITY__P1 2.0
475 #define FXAA_QUALITY__P2 8.0
477 /*--------------------------------------------------------------------------*/
478 #if (FXAA_QUALITY__PRESET == 21)
479 #define FXAA_QUALITY__PS 4
480 #define FXAA_QUALITY__P0 1.0
481 #define FXAA_QUALITY__P1 1.5
482 #define FXAA_QUALITY__P2 2.0
483 #define FXAA_QUALITY__P3 8.0
485 /*--------------------------------------------------------------------------*/
486 #if (FXAA_QUALITY__PRESET == 22)
487 #define FXAA_QUALITY__PS 5
488 #define FXAA_QUALITY__P0 1.0
489 #define FXAA_QUALITY__P1 1.5
490 #define FXAA_QUALITY__P2 2.0
491 #define FXAA_QUALITY__P3 2.0
492 #define FXAA_QUALITY__P4 8.0
494 /*--------------------------------------------------------------------------*/
495 #if (FXAA_QUALITY__PRESET == 23)
496 #define FXAA_QUALITY__PS 6
497 #define FXAA_QUALITY__P0 1.0
498 #define FXAA_QUALITY__P1 1.5
499 #define FXAA_QUALITY__P2 2.0
500 #define FXAA_QUALITY__P3 2.0
501 #define FXAA_QUALITY__P4 2.0
502 #define FXAA_QUALITY__P5 8.0
504 /*--------------------------------------------------------------------------*/
505 #if (FXAA_QUALITY__PRESET == 24)
506 #define FXAA_QUALITY__PS 7
507 #define FXAA_QUALITY__P0 1.0
508 #define FXAA_QUALITY__P1 1.5
509 #define FXAA_QUALITY__P2 2.0
510 #define FXAA_QUALITY__P3 2.0
511 #define FXAA_QUALITY__P4 2.0
512 #define FXAA_QUALITY__P5 3.0
513 #define FXAA_QUALITY__P6 8.0
515 /*--------------------------------------------------------------------------*/
516 #if (FXAA_QUALITY__PRESET == 25)
517 #define FXAA_QUALITY__PS 8
518 #define FXAA_QUALITY__P0 1.0
519 #define FXAA_QUALITY__P1 1.5
520 #define FXAA_QUALITY__P2 2.0
521 #define FXAA_QUALITY__P3 2.0
522 #define FXAA_QUALITY__P4 2.0
523 #define FXAA_QUALITY__P5 2.0
524 #define FXAA_QUALITY__P6 4.0
525 #define FXAA_QUALITY__P7 8.0
527 /*--------------------------------------------------------------------------*/
528 #if (FXAA_QUALITY__PRESET == 26)
529 #define FXAA_QUALITY__PS 9
530 #define FXAA_QUALITY__P0 1.0
531 #define FXAA_QUALITY__P1 1.5
532 #define FXAA_QUALITY__P2 2.0
533 #define FXAA_QUALITY__P3 2.0
534 #define FXAA_QUALITY__P4 2.0
535 #define FXAA_QUALITY__P5 2.0
536 #define FXAA_QUALITY__P6 2.0
537 #define FXAA_QUALITY__P7 4.0
538 #define FXAA_QUALITY__P8 8.0
540 /*--------------------------------------------------------------------------*/
541 #if (FXAA_QUALITY__PRESET == 27)
542 #define FXAA_QUALITY__PS 10
543 #define FXAA_QUALITY__P0 1.0
544 #define FXAA_QUALITY__P1 1.5
545 #define FXAA_QUALITY__P2 2.0
546 #define FXAA_QUALITY__P3 2.0
547 #define FXAA_QUALITY__P4 2.0
548 #define FXAA_QUALITY__P5 2.0
549 #define FXAA_QUALITY__P6 2.0
550 #define FXAA_QUALITY__P7 2.0
551 #define FXAA_QUALITY__P8 4.0
552 #define FXAA_QUALITY__P9 8.0
554 /*--------------------------------------------------------------------------*/
555 #if (FXAA_QUALITY__PRESET == 28)
556 #define FXAA_QUALITY__PS 11
557 #define FXAA_QUALITY__P0 1.0
558 #define FXAA_QUALITY__P1 1.5
559 #define FXAA_QUALITY__P2 2.0
560 #define FXAA_QUALITY__P3 2.0
561 #define FXAA_QUALITY__P4 2.0
562 #define FXAA_QUALITY__P5 2.0
563 #define FXAA_QUALITY__P6 2.0
564 #define FXAA_QUALITY__P7 2.0
565 #define FXAA_QUALITY__P8 2.0
566 #define FXAA_QUALITY__P9 4.0
567 #define FXAA_QUALITY__P10 8.0
569 /*--------------------------------------------------------------------------*/
570 #if (FXAA_QUALITY__PRESET == 29)
571 #define FXAA_QUALITY__PS 12
572 #define FXAA_QUALITY__P0 1.0
573 #define FXAA_QUALITY__P1 1.5
574 #define FXAA_QUALITY__P2 2.0
575 #define FXAA_QUALITY__P3 2.0
576 #define FXAA_QUALITY__P4 2.0
577 #define FXAA_QUALITY__P5 2.0
578 #define FXAA_QUALITY__P6 2.0
579 #define FXAA_QUALITY__P7 2.0
580 #define FXAA_QUALITY__P8 2.0
581 #define FXAA_QUALITY__P9 2.0
582 #define FXAA_QUALITY__P10 4.0
583 #define FXAA_QUALITY__P11 8.0
586 /*============================================================================
587 FXAA QUALITY - EXTREME QUALITY
588 ============================================================================*/
589 #if (FXAA_QUALITY__PRESET == 39)
590 #define FXAA_QUALITY__PS 12
591 #define FXAA_QUALITY__P0 1.0
592 #define FXAA_QUALITY__P1 1.0
593 #define FXAA_QUALITY__P2 1.0
594 #define FXAA_QUALITY__P3 1.0
595 #define FXAA_QUALITY__P4 1.0
596 #define FXAA_QUALITY__P5 1.5
597 #define FXAA_QUALITY__P6 2.0
598 #define FXAA_QUALITY__P7 2.0
599 #define FXAA_QUALITY__P8 2.0
600 #define FXAA_QUALITY__P9 2.0
601 #define FXAA_QUALITY__P10 4.0
602 #define FXAA_QUALITY__P11 8.0
607 /*============================================================================
611 ============================================================================*/
612 #if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1)
613 #define FxaaBool bool
614 #define FxaaDiscard discard
615 #define FxaaFloat float
616 #define FxaaFloat2 vec2
617 #define FxaaFloat3 vec3
618 #define FxaaFloat4 vec4
619 #define FxaaHalf float
620 #define FxaaHalf2 vec2
621 #define FxaaHalf3 vec3
622 #define FxaaHalf4 vec4
623 #define FxaaInt2 ivec2
624 #define FxaaSat(x) clamp(x, 0.0, 1.0)
625 #define FxaaTex sampler2D
627 #define FxaaBool bool
628 #define FxaaDiscard clip(-1)
629 #define FxaaFloat float
630 #define FxaaFloat2 float2
631 #define FxaaFloat3 float3
632 #define FxaaFloat4 float4
633 #define FxaaHalf half
634 #define FxaaHalf2 half2
635 #define FxaaHalf3 half3
636 #define FxaaHalf4 half4
637 #define FxaaSat(x) saturate(x)
639 /*--------------------------------------------------------------------------*/
640 #if (FXAA_GLSL_120 == 1)
644 // #extension GL_EXT_gpu_shader4 : enable
645 // (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9)
646 #define FxaaTexTop(t, p) texture2DLod(t, p, 0.0)
647 #if (FXAA_FAST_PIXEL_OFFSET == 1)
648 #define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)
650 #define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0)
652 #if (FXAA_GATHER4_ALPHA == 1)
653 // use #extension GL_ARB_gpu_shader5 : enable
654 #define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
655 #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
656 #define FxaaTexGreen4(t, p) textureGather(t, p, 1)
657 #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
660 /*--------------------------------------------------------------------------*/
661 #if (FXAA_GLSL_130 == 1)
662 // Requires "#version 130" or better
663 #define FxaaTexTop(t, p) textureLod(t, p, 0.0)
664 #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
665 #if (FXAA_GATHER4_ALPHA == 1)
666 // use #extension GL_ARB_gpu_shader5 : enable
667 #define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
668 #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
669 #define FxaaTexGreen4(t, p) textureGather(t, p, 1)
670 #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
673 /*--------------------------------------------------------------------------*/
674 #if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) || (FXAA_PS3 == 1)
675 #define FxaaInt2 float2
676 #define FxaaTex sampler2D
677 #define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))
678 #define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))
680 /*--------------------------------------------------------------------------*/
681 #if (FXAA_HLSL_4 == 1)
682 #define FxaaInt2 int2
683 struct FxaaTex
{ SamplerState smpl
; Texture2D tex
; };
684 #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
685 #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
687 /*--------------------------------------------------------------------------*/
688 #if (FXAA_HLSL_5 == 1)
689 #define FxaaInt2 int2
690 struct FxaaTex
{ SamplerState smpl
; Texture2D tex
; };
691 #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
692 #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
693 #define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p)
694 #define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o)
695 #define FxaaTexGreen4(t, p) t.tex.GatherGreen(t.smpl, p)
696 #define FxaaTexOffGreen4(t, p, o) t.tex.GatherGreen(t.smpl, p, o)
700 #define FxaaTexTop(t, p) tex2D(t, p)
702 #define FxaaTexOff(t, p, o, r) tex2D(t, p + (o * r))
704 /*============================================================================
705 GREEN AS LUMA OPTION SUPPORT FUNCTION
706 ============================================================================*/
707 #if (FXAA_GREEN_AS_LUMA == 0)
708 FxaaFloat
FxaaLuma(FxaaFloat4 rgba
) { return rgba
.w
; }
710 FxaaFloat
FxaaLuma(FxaaFloat4 rgba
) { return rgba
.y
; }
716 /*============================================================================
720 ============================================================================*/
722 /*--------------------------------------------------------------------------*/
723 FxaaFloat4
FxaaPixelShader(
725 // Use noperspective interpolation here (turn off perspective interpolation).
726 // {xy} = center of pixel
729 // Used only for FXAA Console, and not used on the 360 version.
730 // Use noperspective interpolation here (turn off perspective interpolation).
731 // {xy__} = upper left of pixel
732 // {__zw} = lower right of pixel
733 //FxaaFloat4 fxaaConsolePosPos,
735 // Input color texture.
736 // {rgb_} = color in linear or perceptual color space
737 // if (FXAA_GREEN_AS_LUMA == 0)
738 // {___a} = luma in perceptual color space (not linear)
741 // Only used on the optimized 360 version of FXAA Console.
742 // For everything but 360, just use the same input here as for "tex".
743 // For 360, same texture, just alias with a 2nd sampler.
744 // This sampler needs to have an exponent bias of -1.
745 //FxaaTex fxaaConsole360TexExpBiasNegOne,
747 // Only used on the optimized 360 version of FXAA Console.
748 // For everything but 360, just use the same input here as for "tex".
749 // For 360, same texture, just alias with a 3nd sampler.
750 // This sampler needs to have an exponent bias of -2.
751 //FxaaTex fxaaConsole360TexExpBiasNegTwo,
753 // Only used on FXAA Quality.
754 // This must be from a constant/uniform.
755 // {x_} = 1.0/screenWidthInPixels
756 // {_y} = 1.0/screenHeightInPixels
757 FxaaFloat2 fxaaQualityRcpFrame
,
759 // Only used on FXAA Console.
760 // This must be from a constant/uniform.
761 // This effects sub-pixel AA quality and inversely sharpness.
762 // Where N ranges between,
763 // N = 0.50 (default)
764 // N = 0.33 (sharper)
765 // {x___} = -N/screenWidthInPixels
766 // {_y__} = -N/screenHeightInPixels
767 // {__z_} = N/screenWidthInPixels
768 // {___w} = N/screenHeightInPixels
769 //FxaaFloat4 fxaaConsoleRcpFrameOpt,
771 // Only used on FXAA Console.
772 // Not used on 360, but used on PS3 and PC.
773 // This must be from a constant/uniform.
774 // {x___} = -2.0/screenWidthInPixels
775 // {_y__} = -2.0/screenHeightInPixels
776 // {__z_} = 2.0/screenWidthInPixels
777 // {___w} = 2.0/screenHeightInPixels
778 //FxaaFloat4 fxaaConsoleRcpFrameOpt2,
780 // Only used on FXAA Console.
781 // Only used on 360 in place of fxaaConsoleRcpFrameOpt2.
782 // This must be from a constant/uniform.
783 // {x___} = 8.0/screenWidthInPixels
784 // {_y__} = 8.0/screenHeightInPixels
785 // {__z_} = -4.0/screenWidthInPixels
786 // {___w} = -4.0/screenHeightInPixels
787 //FxaaFloat4 fxaaConsole360RcpFrameOpt2,
789 // Only used on FXAA Quality.
790 // This used to be the FXAA_QUALITY__SUBPIX define.
791 // It is here now to allow easier tuning.
792 // Choose the amount of sub-pixel aliasing removal.
793 // This can effect sharpness.
794 // 1.00 - upper limit (softer)
795 // 0.75 - default amount of filtering
796 // 0.50 - lower limit (sharper, less sub-pixel aliasing removal)
798 // 0.00 - completely off
799 FxaaFloat fxaaQualitySubpix
,
801 // Only used on FXAA Quality.
802 // This used to be the FXAA_QUALITY__EDGE_THRESHOLD define.
803 // It is here now to allow easier tuning.
804 // The minimum amount of local contrast required to apply algorithm.
805 // 0.333 - too little (faster)
806 // 0.250 - low quality
808 // 0.125 - high quality
809 // 0.063 - overkill (slower)
810 FxaaFloat fxaaQualityEdgeThreshold
,
812 // Only used on FXAA Quality.
813 // This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define.
814 // It is here now to allow easier tuning.
815 // Trims the algorithm from processing darks.
816 // 0.0833 - upper limit (default, the start of visible unfiltered edges)
817 // 0.0625 - high quality (faster)
818 // 0.0312 - visible limit (slower)
819 // Special notes when using FXAA_GREEN_AS_LUMA,
820 // Likely want to set this to zero.
821 // As colors that are mostly not-green
822 // will appear very dark in the green channel!
823 // Tune by looking at mostly non-green content,
824 // then start at zero and increase until aliasing is a problem.
825 FxaaFloat fxaaQualityEdgeThresholdMin
827 // Only used on FXAA Console.
828 // This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define.
829 // It is here now to allow easier tuning.
830 // This does not effect PS3, as this needs to be compiled in.
831 // Use FXAA_CONSOLE__PS3_EDGE_SHARPNESS for PS3.
832 // Due to the PS3 being ALU bound,
833 // there are only three safe values here: 2 and 4 and 8.
834 // These options use the shaders ability to a free *|/ by 2|4|8.
835 // For all other platforms can be a non-power of two.
836 // 8.0 is sharper (default!!!)
838 // 2.0 is really soft (good only for vector graphics inputs)
839 //FxaaFloat fxaaConsoleEdgeSharpness,
841 // Only used on FXAA Console.
842 // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define.
843 // It is here now to allow easier tuning.
844 // This does not effect PS3, as this needs to be compiled in.
845 // Use FXAA_CONSOLE__PS3_EDGE_THRESHOLD for PS3.
846 // Due to the PS3 being ALU bound,
847 // there are only two safe values here: 1/4 and 1/8.
848 // These options use the shaders ability to a free *|/ by 2|4|8.
849 // The console setting has a different mapping than the quality setting.
850 // Other platforms can use other values.
851 // 0.125 leaves less aliasing, but is softer (default!!!)
852 // 0.25 leaves more aliasing, and is sharper
853 //FxaaFloat fxaaConsoleEdgeThreshold,
855 // Only used on FXAA Console.
856 // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define.
857 // It is here now to allow easier tuning.
858 // Trims the algorithm from processing darks.
859 // The console setting has a different mapping than the quality setting.
860 // This only applies when FXAA_EARLY_EXIT is 1.
861 // This does not apply to PS3,
862 // PS3 was simplified to avoid more shader instructions.
863 // 0.06 - faster but more aliasing in darks
865 // 0.04 - slower and less aliasing in darks
866 // Special notes when using FXAA_GREEN_AS_LUMA,
867 // Likely want to set this to zero.
868 // As colors that are mostly not-green
869 // will appear very dark in the green channel!
870 // Tune by looking at mostly non-green content,
871 // then start at zero and increase until aliasing is a problem.
872 //FxaaFloat fxaaConsoleEdgeThresholdMin,
874 // Extra constants for 360 FXAA Console only.
875 // Use zeros or anything else for other platforms.
876 // These must be in physical constant registers and NOT immedates.
877 // Immedates will result in compiler un-optimizing.
878 // {xyzw} = float4(1.0, -1.0, 0.25, -0.25)
879 //FxaaFloat4 fxaaConsole360ConstDir
881 /*--------------------------------------------------------------------------*/
885 #if (FXAA_GATHER4_ALPHA == 1)
886 #if (FXAA_DISCARD == 0)
887 FxaaFloat4 rgbyM
= FxaaTexTop(tex
, posM
);
888 #if (FXAA_GREEN_AS_LUMA == 0)
889 #define lumaM rgbyM.w
891 #define lumaM rgbyM.y
894 #if (FXAA_GREEN_AS_LUMA == 0)
895 FxaaFloat4 luma4A
= FxaaTexAlpha4(tex
, posM
);
896 FxaaFloat4 luma4B
= FxaaTexOffAlpha4(tex
, posM
, FxaaInt2(-1, -1));
898 FxaaFloat4 luma4A
= FxaaTexGreen4(tex
, posM
);
899 FxaaFloat4 luma4B
= FxaaTexOffGreen4(tex
, posM
, FxaaInt2(-1, -1));
901 #if (FXAA_DISCARD == 1)
902 #define lumaM luma4A.w
904 #define lumaE luma4A.z
905 #define lumaS luma4A.x
906 #define lumaSE luma4A.y
907 #define lumaNW luma4B.w
908 #define lumaN luma4B.z
909 #define lumaW luma4B.x
911 FxaaFloat4 rgbyM
= FxaaTexTop(tex
, posM
);
912 #if (FXAA_GREEN_AS_LUMA == 0)
913 #define lumaM rgbyM.w
915 #define lumaM rgbyM.y
917 FxaaFloat lumaS
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2( 0, 1), fxaaQualityRcpFrame
.xy
));
918 FxaaFloat lumaE
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2( 1, 0), fxaaQualityRcpFrame
.xy
));
919 FxaaFloat lumaN
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2( 0,-1), fxaaQualityRcpFrame
.xy
));
920 FxaaFloat lumaW
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2(-1, 0), fxaaQualityRcpFrame
.xy
));
922 /*--------------------------------------------------------------------------*/
923 FxaaFloat maxSM
= max(lumaS
, lumaM
);
924 FxaaFloat minSM
= min(lumaS
, lumaM
);
925 FxaaFloat maxESM
= max(lumaE
, maxSM
);
926 FxaaFloat minESM
= min(lumaE
, minSM
);
927 FxaaFloat maxWN
= max(lumaN
, lumaW
);
928 FxaaFloat minWN
= min(lumaN
, lumaW
);
929 FxaaFloat rangeMax
= max(maxWN
, maxESM
);
930 FxaaFloat rangeMin
= min(minWN
, minESM
);
931 FxaaFloat rangeMaxScaled
= rangeMax
* fxaaQualityEdgeThreshold
;
932 FxaaFloat range
= rangeMax
- rangeMin
;
933 FxaaFloat rangeMaxClamped
= max(fxaaQualityEdgeThresholdMin
, rangeMaxScaled
);
934 FxaaBool earlyExit
= range
< rangeMaxClamped
;
935 /*--------------------------------------------------------------------------*/
937 #if (FXAA_DISCARD == 1)
942 /*--------------------------------------------------------------------------*/
943 #if (FXAA_GATHER4_ALPHA == 0)
944 FxaaFloat lumaNW
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2(-1,-1), fxaaQualityRcpFrame
.xy
));
945 FxaaFloat lumaSE
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2( 1, 1), fxaaQualityRcpFrame
.xy
));
946 FxaaFloat lumaNE
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2( 1,-1), fxaaQualityRcpFrame
.xy
));
947 FxaaFloat lumaSW
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2(-1, 1), fxaaQualityRcpFrame
.xy
));
949 FxaaFloat lumaNE
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2(1, -1), fxaaQualityRcpFrame
.xy
));
950 FxaaFloat lumaSW
= FxaaLuma(FxaaTexOff(tex
, posM
, FxaaInt2(-1, 1), fxaaQualityRcpFrame
.xy
));
952 /*--------------------------------------------------------------------------*/
953 FxaaFloat lumaNS
= lumaN
+ lumaS
;
954 FxaaFloat lumaWE
= lumaW
+ lumaE
;
955 FxaaFloat subpixRcpRange
= 1.0/range
;
956 FxaaFloat subpixNSWE
= lumaNS
+ lumaWE
;
957 FxaaFloat edgeHorz1
= (-2.0 * lumaM
) + lumaNS
;
958 FxaaFloat edgeVert1
= (-2.0 * lumaM
) + lumaWE
;
959 /*--------------------------------------------------------------------------*/
960 FxaaFloat lumaNESE
= lumaNE
+ lumaSE
;
961 FxaaFloat lumaNWNE
= lumaNW
+ lumaNE
;
962 FxaaFloat edgeHorz2
= (-2.0 * lumaE
) + lumaNESE
;
963 FxaaFloat edgeVert2
= (-2.0 * lumaN
) + lumaNWNE
;
964 /*--------------------------------------------------------------------------*/
965 FxaaFloat lumaNWSW
= lumaNW
+ lumaSW
;
966 FxaaFloat lumaSWSE
= lumaSW
+ lumaSE
;
967 FxaaFloat edgeHorz4
= (abs(edgeHorz1
) * 2.0) + abs(edgeHorz2
);
968 FxaaFloat edgeVert4
= (abs(edgeVert1
) * 2.0) + abs(edgeVert2
);
969 FxaaFloat edgeHorz3
= (-2.0 * lumaW
) + lumaNWSW
;
970 FxaaFloat edgeVert3
= (-2.0 * lumaS
) + lumaSWSE
;
971 FxaaFloat edgeHorz
= abs(edgeHorz3
) + edgeHorz4
;
972 FxaaFloat edgeVert
= abs(edgeVert3
) + edgeVert4
;
973 /*--------------------------------------------------------------------------*/
974 FxaaFloat subpixNWSWNESE
= lumaNWSW
+ lumaNESE
;
975 FxaaFloat lengthSign
= fxaaQualityRcpFrame
.x
;
976 FxaaBool horzSpan
= edgeHorz
>= edgeVert
;
977 FxaaFloat subpixA
= subpixNSWE
* 2.0 + subpixNWSWNESE
;
978 /*--------------------------------------------------------------------------*/
979 if(!horzSpan
) lumaN
= lumaW
;
980 if(!horzSpan
) lumaS
= lumaE
;
981 if(horzSpan
) lengthSign
= fxaaQualityRcpFrame
.y
;
982 FxaaFloat subpixB
= (subpixA
* (1.0/12.0)) - lumaM
;
983 /*--------------------------------------------------------------------------*/
984 FxaaFloat gradientN
= lumaN
- lumaM
;
985 FxaaFloat gradientS
= lumaS
- lumaM
;
986 FxaaFloat lumaNN
= lumaN
+ lumaM
;
987 FxaaFloat lumaSS
= lumaS
+ lumaM
;
988 FxaaBool pairN
= abs(gradientN
) >= abs(gradientS
);
989 FxaaFloat gradient
= max(abs(gradientN
), abs(gradientS
));
990 if(pairN
) lengthSign
= -lengthSign
;
991 FxaaFloat subpixC
= FxaaSat(abs(subpixB
) * subpixRcpRange
);
992 /*--------------------------------------------------------------------------*/
997 offNP
.x
= (!horzSpan
) ? 0.0 : fxaaQualityRcpFrame
.x
;
998 offNP
.y
= ( horzSpan
) ? 0.0 : fxaaQualityRcpFrame
.y
;
999 if(!horzSpan
) posB
.x
+= lengthSign
* 0.5;
1000 if( horzSpan
) posB
.y
+= lengthSign
* 0.5;
1001 /*--------------------------------------------------------------------------*/
1003 posN
.x
= posB
.x
- offNP
.x
* FXAA_QUALITY__P0
;
1004 posN
.y
= posB
.y
- offNP
.y
* FXAA_QUALITY__P0
;
1006 posP
.x
= posB
.x
+ offNP
.x
* FXAA_QUALITY__P0
;
1007 posP
.y
= posB
.y
+ offNP
.y
* FXAA_QUALITY__P0
;
1008 FxaaFloat subpixD
= ((-2.0)*subpixC
) + 3.0;
1009 FxaaFloat lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
));
1010 FxaaFloat subpixE
= subpixC
* subpixC
;
1011 FxaaFloat lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
));
1012 /*--------------------------------------------------------------------------*/
1013 if(!pairN
) lumaNN
= lumaSS
;
1014 FxaaFloat gradientScaled
= gradient
* 1.0/4.0;
1015 FxaaFloat lumaMM
= lumaM
- lumaNN
* 0.5;
1016 FxaaFloat subpixF
= subpixD
* subpixE
;
1017 FxaaBool lumaMLTZero
= lumaMM
< 0.0;
1018 /*--------------------------------------------------------------------------*/
1019 lumaEndN
-= lumaNN
* 0.5;
1020 lumaEndP
-= lumaNN
* 0.5;
1021 FxaaBool doneN
= abs(lumaEndN
) >= gradientScaled
;
1022 FxaaBool doneP
= abs(lumaEndP
) >= gradientScaled
;
1023 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P1
;
1024 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P1
;
1025 FxaaBool doneNP
= (!doneN
) || (!doneP
);
1026 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P1
;
1027 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P1
;
1028 /*--------------------------------------------------------------------------*/
1030 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1031 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1032 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1033 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1034 doneN
= abs(lumaEndN
) >= gradientScaled
;
1035 doneP
= abs(lumaEndP
) >= gradientScaled
;
1036 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P2
;
1037 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P2
;
1038 doneNP
= (!doneN
) || (!doneP
);
1039 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P2
;
1040 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P2
;
1041 /*--------------------------------------------------------------------------*/
1042 #if (FXAA_QUALITY__PS > 3)
1044 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1045 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1046 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1047 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1048 doneN
= abs(lumaEndN
) >= gradientScaled
;
1049 doneP
= abs(lumaEndP
) >= gradientScaled
;
1050 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P3
;
1051 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P3
;
1052 doneNP
= (!doneN
) || (!doneP
);
1053 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P3
;
1054 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P3
;
1055 /*--------------------------------------------------------------------------*/
1056 #if (FXAA_QUALITY__PS > 4)
1058 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1059 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1060 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1061 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1062 doneN
= abs(lumaEndN
) >= gradientScaled
;
1063 doneP
= abs(lumaEndP
) >= gradientScaled
;
1064 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P4
;
1065 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P4
;
1066 doneNP
= (!doneN
) || (!doneP
);
1067 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P4
;
1068 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P4
;
1069 /*--------------------------------------------------------------------------*/
1070 #if (FXAA_QUALITY__PS > 5)
1072 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1073 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1074 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1075 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1076 doneN
= abs(lumaEndN
) >= gradientScaled
;
1077 doneP
= abs(lumaEndP
) >= gradientScaled
;
1078 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P5
;
1079 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P5
;
1080 doneNP
= (!doneN
) || (!doneP
);
1081 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P5
;
1082 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P5
;
1083 /*--------------------------------------------------------------------------*/
1084 #if (FXAA_QUALITY__PS > 6)
1086 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1087 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1088 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1089 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1090 doneN
= abs(lumaEndN
) >= gradientScaled
;
1091 doneP
= abs(lumaEndP
) >= gradientScaled
;
1092 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P6
;
1093 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P6
;
1094 doneNP
= (!doneN
) || (!doneP
);
1095 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P6
;
1096 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P6
;
1097 /*--------------------------------------------------------------------------*/
1098 #if (FXAA_QUALITY__PS > 7)
1100 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1101 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1102 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1103 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1104 doneN
= abs(lumaEndN
) >= gradientScaled
;
1105 doneP
= abs(lumaEndP
) >= gradientScaled
;
1106 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P7
;
1107 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P7
;
1108 doneNP
= (!doneN
) || (!doneP
);
1109 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P7
;
1110 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P7
;
1111 /*--------------------------------------------------------------------------*/
1112 #if (FXAA_QUALITY__PS > 8)
1114 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1115 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1116 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1117 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1118 doneN
= abs(lumaEndN
) >= gradientScaled
;
1119 doneP
= abs(lumaEndP
) >= gradientScaled
;
1120 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P8
;
1121 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P8
;
1122 doneNP
= (!doneN
) || (!doneP
);
1123 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P8
;
1124 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P8
;
1125 /*--------------------------------------------------------------------------*/
1126 #if (FXAA_QUALITY__PS > 9)
1128 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1129 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1130 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1131 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1132 doneN
= abs(lumaEndN
) >= gradientScaled
;
1133 doneP
= abs(lumaEndP
) >= gradientScaled
;
1134 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P9
;
1135 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P9
;
1136 doneNP
= (!doneN
) || (!doneP
);
1137 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P9
;
1138 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P9
;
1139 /*--------------------------------------------------------------------------*/
1140 #if (FXAA_QUALITY__PS > 10)
1142 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1143 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1144 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1145 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1146 doneN
= abs(lumaEndN
) >= gradientScaled
;
1147 doneP
= abs(lumaEndP
) >= gradientScaled
;
1148 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P10
;
1149 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P10
;
1150 doneNP
= (!doneN
) || (!doneP
);
1151 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P10
;
1152 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P10
;
1153 /*--------------------------------------------------------------------------*/
1154 #if (FXAA_QUALITY__PS > 11)
1156 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1157 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1158 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1159 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1160 doneN
= abs(lumaEndN
) >= gradientScaled
;
1161 doneP
= abs(lumaEndP
) >= gradientScaled
;
1162 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P11
;
1163 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P11
;
1164 doneNP
= (!doneN
) || (!doneP
);
1165 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P11
;
1166 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P11
;
1167 /*--------------------------------------------------------------------------*/
1168 #if (FXAA_QUALITY__PS > 12)
1170 if(!doneN
) lumaEndN
= FxaaLuma(FxaaTexTop(tex
, posN
.xy
));
1171 if(!doneP
) lumaEndP
= FxaaLuma(FxaaTexTop(tex
, posP
.xy
));
1172 if(!doneN
) lumaEndN
= lumaEndN
- lumaNN
* 0.5;
1173 if(!doneP
) lumaEndP
= lumaEndP
- lumaNN
* 0.5;
1174 doneN
= abs(lumaEndN
) >= gradientScaled
;
1175 doneP
= abs(lumaEndP
) >= gradientScaled
;
1176 if(!doneN
) posN
.x
-= offNP
.x
* FXAA_QUALITY__P12
;
1177 if(!doneN
) posN
.y
-= offNP
.y
* FXAA_QUALITY__P12
;
1178 doneNP
= (!doneN
) || (!doneP
);
1179 if(!doneP
) posP
.x
+= offNP
.x
* FXAA_QUALITY__P12
;
1180 if(!doneP
) posP
.y
+= offNP
.y
* FXAA_QUALITY__P12
;
1181 /*--------------------------------------------------------------------------*/
1184 /*--------------------------------------------------------------------------*/
1187 /*--------------------------------------------------------------------------*/
1190 /*--------------------------------------------------------------------------*/
1193 /*--------------------------------------------------------------------------*/
1196 /*--------------------------------------------------------------------------*/
1199 /*--------------------------------------------------------------------------*/
1202 /*--------------------------------------------------------------------------*/
1205 /*--------------------------------------------------------------------------*/
1208 /*--------------------------------------------------------------------------*/
1211 /*--------------------------------------------------------------------------*/
1213 /*--------------------------------------------------------------------------*/
1214 FxaaFloat dstN
= posM
.x
- posN
.x
;
1215 FxaaFloat dstP
= posP
.x
- posM
.x
;
1216 if(!horzSpan
) dstN
= posM
.y
- posN
.y
;
1217 if(!horzSpan
) dstP
= posP
.y
- posM
.y
;
1218 /*--------------------------------------------------------------------------*/
1219 FxaaBool goodSpanN
= (lumaEndN
< 0.0) != lumaMLTZero
;
1220 FxaaFloat spanLength
= (dstP
+ dstN
);
1221 FxaaBool goodSpanP
= (lumaEndP
< 0.0) != lumaMLTZero
;
1222 FxaaFloat spanLengthRcp
= 1.0/spanLength
;
1223 /*--------------------------------------------------------------------------*/
1224 FxaaBool directionN
= dstN
< dstP
;
1225 FxaaFloat dst
= min(dstN
, dstP
);
1226 FxaaBool goodSpan
= directionN
? goodSpanN
: goodSpanP
;
1227 FxaaFloat subpixG
= subpixF
* subpixF
;
1228 FxaaFloat pixelOffset
= (dst
* (-spanLengthRcp
)) + 0.5;
1229 FxaaFloat subpixH
= subpixG
* fxaaQualitySubpix
;
1230 /*--------------------------------------------------------------------------*/
1231 FxaaFloat pixelOffsetGood
= goodSpan
? pixelOffset
: 0.0;
1232 FxaaFloat pixelOffsetSubpix
= max(pixelOffsetGood
, subpixH
);
1233 if(!horzSpan
) posM
.x
+= pixelOffsetSubpix
* lengthSign
;
1234 if( horzSpan
) posM
.y
+= pixelOffsetSubpix
* lengthSign
;
1235 #if (FXAA_DISCARD == 1)
1236 return FxaaTexTop(tex
, posM
);
1238 return FxaaFloat4(FxaaTexTop(tex
, posM
).xyz
, lumaM
);
1241 /*==========================================================================*/
1247 /*============================================================================
1249 FXAA3 CONSOLE - PC VERSION
1250 ------------------------------------------------------------------------------
1251 Instead of using this on PC, I'd suggest just using FXAA Quality with
1252 #define FXAA_QUALITY__PRESET 10
1254 #define FXAA_QUALITY__PRESET 20
1255 Either are higher qualilty and almost as fast as this on modern PC GPUs.
1256 ============================================================================*/
1257 #if (FXAA_PC_CONSOLE == 1)
1258 /*--------------------------------------------------------------------------*/
1259 FxaaFloat4
FxaaPixelShader(
1260 // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1262 FxaaFloat4 fxaaConsolePosPos
,
1264 FxaaTex fxaaConsole360TexExpBiasNegOne
,
1265 FxaaTex fxaaConsole360TexExpBiasNegTwo
,
1266 FxaaFloat2 fxaaQualityRcpFrame
,
1267 FxaaFloat4 fxaaConsoleRcpFrameOpt
,
1268 FxaaFloat4 fxaaConsoleRcpFrameOpt2
,
1269 FxaaFloat4 fxaaConsole360RcpFrameOpt2
,
1270 FxaaFloat fxaaQualitySubpix
,
1271 FxaaFloat fxaaQualityEdgeThreshold
,
1272 FxaaFloat fxaaQualityEdgeThresholdMin
,
1273 FxaaFloat fxaaConsoleEdgeSharpness
,
1274 FxaaFloat fxaaConsoleEdgeThreshold
,
1275 FxaaFloat fxaaConsoleEdgeThresholdMin
,
1276 FxaaFloat4 fxaaConsole360ConstDir
1278 /*--------------------------------------------------------------------------*/
1279 FxaaFloat lumaNw
= FxaaLuma(FxaaTexTop(tex
, fxaaConsolePosPos
.xy
));
1280 FxaaFloat lumaSw
= FxaaLuma(FxaaTexTop(tex
, fxaaConsolePosPos
.xw
));
1281 FxaaFloat lumaNe
= FxaaLuma(FxaaTexTop(tex
, fxaaConsolePosPos
.zy
));
1282 FxaaFloat lumaSe
= FxaaLuma(FxaaTexTop(tex
, fxaaConsolePosPos
.zw
));
1283 /*--------------------------------------------------------------------------*/
1284 FxaaFloat4 rgbyM
= FxaaTexTop(tex
, pos
.xy
);
1285 #if (FXAA_GREEN_AS_LUMA == 0)
1286 FxaaFloat lumaM
= rgbyM
.w
;
1288 FxaaFloat lumaM
= rgbyM
.y
;
1290 /*--------------------------------------------------------------------------*/
1291 FxaaFloat lumaMaxNwSw
= max(lumaNw
, lumaSw
);
1292 lumaNe
+= 1.0/384.0;
1293 FxaaFloat lumaMinNwSw
= min(lumaNw
, lumaSw
);
1294 /*--------------------------------------------------------------------------*/
1295 FxaaFloat lumaMaxNeSe
= max(lumaNe
, lumaSe
);
1296 FxaaFloat lumaMinNeSe
= min(lumaNe
, lumaSe
);
1297 /*--------------------------------------------------------------------------*/
1298 FxaaFloat lumaMax
= max(lumaMaxNeSe
, lumaMaxNwSw
);
1299 FxaaFloat lumaMin
= min(lumaMinNeSe
, lumaMinNwSw
);
1300 /*--------------------------------------------------------------------------*/
1301 FxaaFloat lumaMaxScaled
= lumaMax
* fxaaConsoleEdgeThreshold
;
1302 /*--------------------------------------------------------------------------*/
1303 FxaaFloat lumaMinM
= min(lumaMin
, lumaM
);
1304 FxaaFloat lumaMaxScaledClamped
= max(fxaaConsoleEdgeThresholdMin
, lumaMaxScaled
);
1305 FxaaFloat lumaMaxM
= max(lumaMax
, lumaM
);
1306 FxaaFloat dirSwMinusNe
= lumaSw
- lumaNe
;
1307 FxaaFloat lumaMaxSubMinM
= lumaMaxM
- lumaMinM
;
1308 FxaaFloat dirSeMinusNw
= lumaSe
- lumaNw
;
1309 if(lumaMaxSubMinM
< lumaMaxScaledClamped
) return rgbyM
;
1310 /*--------------------------------------------------------------------------*/
1312 dir
.x
= dirSwMinusNe
+ dirSeMinusNw
;
1313 dir
.y
= dirSwMinusNe
- dirSeMinusNw
;
1314 /*--------------------------------------------------------------------------*/
1315 FxaaFloat2 dir1
= normalize(dir
.xy
);
1316 FxaaFloat4 rgbyN1
= FxaaTexTop(tex
, pos
.xy
- dir1
* fxaaConsoleRcpFrameOpt
.zw
);
1317 FxaaFloat4 rgbyP1
= FxaaTexTop(tex
, pos
.xy
+ dir1
* fxaaConsoleRcpFrameOpt
.zw
);
1318 /*--------------------------------------------------------------------------*/
1319 FxaaFloat dirAbsMinTimesC
= min(abs(dir1
.x
), abs(dir1
.y
)) * fxaaConsoleEdgeSharpness
;
1320 FxaaFloat2 dir2
= clamp(dir1
.xy
/ dirAbsMinTimesC
, -2.0, 2.0);
1321 /*--------------------------------------------------------------------------*/
1322 FxaaFloat4 rgbyN2
= FxaaTexTop(tex
, pos
.xy
- dir2
* fxaaConsoleRcpFrameOpt2
.zw
);
1323 FxaaFloat4 rgbyP2
= FxaaTexTop(tex
, pos
.xy
+ dir2
* fxaaConsoleRcpFrameOpt2
.zw
);
1324 /*--------------------------------------------------------------------------*/
1325 FxaaFloat4 rgbyA
= rgbyN1
+ rgbyP1
;
1326 FxaaFloat4 rgbyB
= ((rgbyN2
+ rgbyP2
) * 0.25) + (rgbyA
* 0.25);
1327 /*--------------------------------------------------------------------------*/
1328 #if (FXAA_GREEN_AS_LUMA == 0)
1329 FxaaBool twoTap
= (rgbyB
.w
< lumaMin
) || (rgbyB
.w
> lumaMax
);
1331 FxaaBool twoTap
= (rgbyB
.y
< lumaMin
) || (rgbyB
.y
> lumaMax
);
1333 if(twoTap
) rgbyB
.xyz
= rgbyA
.xyz
* 0.5;
1335 /*==========================================================================*/
1340 /*============================================================================
1342 FXAA3 CONSOLE - 360 PIXEL SHADER
1344 ------------------------------------------------------------------------------
1345 This optimized version thanks to suggestions from Andy Luedke.
1346 Should be fully tex bound in all cases.
1347 As of the FXAA 3.11 release, I have still not tested this code,
1348 however I fixed a bug which was in both FXAA 3.9 and FXAA 3.10.
1349 And note this is replacing the old unoptimized version.
1350 If it does not work, please let me know so I can fix it.
1351 ============================================================================*/
1353 /*--------------------------------------------------------------------------*/
1354 [reduceTempRegUsage(4)]
1355 float4
FxaaPixelShader(
1356 // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1358 FxaaFloat4 fxaaConsolePosPos
,
1360 FxaaTex fxaaConsole360TexExpBiasNegOne
,
1361 FxaaTex fxaaConsole360TexExpBiasNegTwo
,
1362 FxaaFloat2 fxaaQualityRcpFrame
,
1363 FxaaFloat4 fxaaConsoleRcpFrameOpt
,
1364 FxaaFloat4 fxaaConsoleRcpFrameOpt2
,
1365 FxaaFloat4 fxaaConsole360RcpFrameOpt2
,
1366 FxaaFloat fxaaQualitySubpix
,
1367 FxaaFloat fxaaQualityEdgeThreshold
,
1368 FxaaFloat fxaaQualityEdgeThresholdMin
,
1369 FxaaFloat fxaaConsoleEdgeSharpness
,
1370 FxaaFloat fxaaConsoleEdgeThreshold
,
1371 FxaaFloat fxaaConsoleEdgeThresholdMin
,
1372 FxaaFloat4 fxaaConsole360ConstDir
1374 /*--------------------------------------------------------------------------*/
1375 float4 lumaNwNeSwSe
;
1376 #if (FXAA_GREEN_AS_LUMA == 0)
1378 tfetch2D lumaNwNeSwSe
.w___
, tex
, pos
.xy
, OffsetX
= -0.5, OffsetY
= -0.5, UseComputedLOD
=false
1379 tfetch2D lumaNwNeSwSe
._w__
, tex
, pos
.xy
, OffsetX
= 0.5, OffsetY
= -0.5, UseComputedLOD
=false
1380 tfetch2D lumaNwNeSwSe
.__w_
, tex
, pos
.xy
, OffsetX
= -0.5, OffsetY
= 0.5, UseComputedLOD
=false
1381 tfetch2D lumaNwNeSwSe
.___w
, tex
, pos
.xy
, OffsetX
= 0.5, OffsetY
= 0.5, UseComputedLOD
=false
1385 tfetch2D lumaNwNeSwSe
.y___
, tex
, pos
.xy
, OffsetX
= -0.5, OffsetY
= -0.5, UseComputedLOD
=false
1386 tfetch2D lumaNwNeSwSe
._y__
, tex
, pos
.xy
, OffsetX
= 0.5, OffsetY
= -0.5, UseComputedLOD
=false
1387 tfetch2D lumaNwNeSwSe
.__y_
, tex
, pos
.xy
, OffsetX
= -0.5, OffsetY
= 0.5, UseComputedLOD
=false
1388 tfetch2D lumaNwNeSwSe
.___y
, tex
, pos
.xy
, OffsetX
= 0.5, OffsetY
= 0.5, UseComputedLOD
=false
1391 /*--------------------------------------------------------------------------*/
1392 lumaNwNeSwSe
.y
+= 1.0/384.0;
1393 float2 lumaMinTemp
= min(lumaNwNeSwSe
.xy
, lumaNwNeSwSe
.zw
);
1394 float2 lumaMaxTemp
= max(lumaNwNeSwSe
.xy
, lumaNwNeSwSe
.zw
);
1395 float lumaMin
= min(lumaMinTemp
.x
, lumaMinTemp
.y
);
1396 float lumaMax
= max(lumaMaxTemp
.x
, lumaMaxTemp
.y
);
1397 /*--------------------------------------------------------------------------*/
1398 float4 rgbyM
= tex2Dlod(tex
, float4(pos
.xy
, 0.0, 0.0));
1399 #if (FXAA_GREEN_AS_LUMA == 0)
1400 float lumaMinM
= min(lumaMin
, rgbyM
.w
);
1401 float lumaMaxM
= max(lumaMax
, rgbyM
.w
);
1403 float lumaMinM
= min(lumaMin
, rgbyM
.y
);
1404 float lumaMaxM
= max(lumaMax
, rgbyM
.y
);
1406 if((lumaMaxM
- lumaMinM
) < max(fxaaConsoleEdgeThresholdMin
, lumaMax
* fxaaConsoleEdgeThreshold
)) return rgbyM
;
1407 /*--------------------------------------------------------------------------*/
1409 dir
.x
= dot(lumaNwNeSwSe
, fxaaConsole360ConstDir
.yyxx
);
1410 dir
.y
= dot(lumaNwNeSwSe
, fxaaConsole360ConstDir
.xyxy
);
1411 dir
= normalize(dir
);
1412 /*--------------------------------------------------------------------------*/
1413 float4 dir1
= dir
.xyxy
* fxaaConsoleRcpFrameOpt
.xyzw
;
1414 /*--------------------------------------------------------------------------*/
1416 float dirAbsMinTimesC
= min(abs(dir
.x
), abs(dir
.y
)) * fxaaConsoleEdgeSharpness
;
1417 dir2
= saturate(fxaaConsole360ConstDir
.zzww
* dir
.xyxy
/ dirAbsMinTimesC
+ 0.5);
1418 dir2
= dir2
* fxaaConsole360RcpFrameOpt2
.xyxy
+ fxaaConsole360RcpFrameOpt2
.zwzw
;
1419 /*--------------------------------------------------------------------------*/
1420 float4 rgbyN1
= tex2Dlod(fxaaConsole360TexExpBiasNegOne
, float4(pos
.xy
+ dir1
.xy
, 0.0, 0.0));
1421 float4 rgbyP1
= tex2Dlod(fxaaConsole360TexExpBiasNegOne
, float4(pos
.xy
+ dir1
.zw
, 0.0, 0.0));
1422 float4 rgbyN2
= tex2Dlod(fxaaConsole360TexExpBiasNegTwo
, float4(pos
.xy
+ dir2
.xy
, 0.0, 0.0));
1423 float4 rgbyP2
= tex2Dlod(fxaaConsole360TexExpBiasNegTwo
, float4(pos
.xy
+ dir2
.zw
, 0.0, 0.0));
1424 /*--------------------------------------------------------------------------*/
1425 float4 rgbyA
= rgbyN1
+ rgbyP1
;
1426 float4 rgbyB
= rgbyN2
+ rgbyP2
+ rgbyA
* 0.5;
1427 /*--------------------------------------------------------------------------*/
1428 float4 rgbyR
= ((FxaaLuma(rgbyB
) - lumaMax
) > 0.0) ? rgbyA
: rgbyB
;
1429 rgbyR
= ((FxaaLuma(rgbyB
) - lumaMin
) > 0.0) ? rgbyR
: rgbyA
;
1431 /*==========================================================================*/
1436 /*============================================================================
1438 FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT)
1440 ==============================================================================
1441 The code below does not exactly match the assembly.
1442 I have a feeling that 12 cycles is possible, but was not able to get there.
1443 Might have to increase register count to get full performance.
1444 Note this shader does not use perspective interpolation.
1446 Use the following cgc options,
1448 --fenable-bx2 --fastmath --fastprecision --nofloatbindings
1450 ------------------------------------------------------------------------------
1452 ------------------------------------------------------------------------------
1453 For reference and to aid in debug, output of NVShaderPerf should match this,
1456 0: texpkb h0.w(TRUE), v5.zyxx, #0
1457 2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
1458 4: texpkb h0.w(TRUE), v5.xwxx, #0
1459 6: addh h0.z(TRUE), -h2, h0.w
1460 7: texpkb h1.w(TRUE), v5, #0
1461 9: addh h0.x(TRUE), h0.z, -h1.w
1462 10: addh h3.w(TRUE), h0.z, h1
1463 11: texpkb h2.w(TRUE), v5.zwzz, #0
1464 13: addh h0.z(TRUE), h3.w, -h2.w
1465 14: addh h0.x(TRUE), h2.w, h0
1466 15: nrmh h1.xz(TRUE), h0_n
1467 16: minh_m8 h0.x(TRUE), |h1|, |h1.z|
1468 17: maxh h4.w(TRUE), h0, h1
1469 18: divx h2.xy(TRUE), h1_n.xzzw, h0_n
1470 19: movr r1.zw(TRUE), v4.xxxy
1471 20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww
1472 22: minh h5.w(TRUE), h0, h1
1473 23: texpkb h0(TRUE), r2.xzxx, #0
1474 25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1
1475 27: maxh h4.x(TRUE), h2.z, h2.w
1476 28: texpkb h1(TRUE), r0.zwzz, #0
1477 30: addh_d2 h1(TRUE), h0, h1
1478 31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1479 33: texpkb h0(TRUE), r0, #0
1480 35: minh h4.z(TRUE), h2, h2.w
1482 37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1483 39: texpkb h2(TRUE), r1, #0
1484 41: addh_d2 h0(TRUE), h0, h2
1485 42: maxh h2.w(TRUE), h4, h4.x
1486 43: minh h2.x(TRUE), h5.w, h4.z
1487 44: addh_d2 h0(TRUE), h0, h1
1488 45: slth h2.x(TRUE), h0.w, h2
1489 46: sgth h2.w(TRUE), h0, h2
1490 47: movh h0(TRUE), h0
1491 48: addx.c0 rc(TRUE), h2, h2.w
1492 49: movh h0(c0.NE.x), h1
1494 IPU0 ------ Simplified schedule: --------
1495 Pass | Unit | uOp | PC: Op
1496 -----+--------+------+-------------------------
1497 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1498 | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1499 | SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-;
1501 2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
1502 | TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
1503 | SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-;
1505 3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
1506 | TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
1507 | SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---;
1508 | SCB1 | add | 10: ADDh h3.w, h0.---z, h1;
1510 4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1511 | TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1512 | SCB0 | add | 14: ADDh h0.x, h2.w---, h0;
1513 | SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-;
1515 5 | SCT1 | mov | 15: NRMh h1.xz, h0;
1516 | SRB | nrm | 15: NRMh h1.xz, h0;
1517 | SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|;
1518 | SCB1 | max | 17: MAXh h4.w, h0, h1;
1520 6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0;
1521 | SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy;
1522 | SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-;
1523 | SCB1 | min | 22: MINh h5.w, h0, h1;
1525 7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
1526 | TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
1527 | SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---;
1528 | SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1;
1530 8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
1531 | TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
1532 | SCB0/1 | add | 30: ADDh/2 h1, h0, h1;
1534 9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--;
1535 | SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0;
1536 | TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0;
1537 | SCB1 | min | 35: MINh h4.z, h2, h2.--w-;
1539 10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--;
1540 | SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0;
1541 | TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0;
1542 | SCB0/1 | add | 41: ADDh/2 h0, h0, h2;
1544 11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---;
1545 | SCT1 | max | 42: MAXh h2.w, h4, h4.---x;
1546 | SCB0/1 | add | 44: ADDh/2 h0, h0, h1;
1548 12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2;
1549 | SCT1 | set | 46: SGTh h2.w, h0, h2;
1550 | SCB0/1 | mul | 47: MOVh h0, h0;
1552 13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---;
1553 | SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1;
1571 Pass SCT0 SCT1 TEX SCB0 SCB1
1572 1: 0% 0% 100% 0% 100%
1573 2: 0% 0% 100% 0% 100%
1574 3: 0% 0% 100% 100% 100%
1575 4: 0% 0% 100% 100% 100%
1576 5: 0% 0% 0% 100% 100%
1577 6: 100% 100% 0% 100% 100%
1578 7: 0% 0% 100% 100% 100%
1579 8: 0% 0% 100% 100% 100%
1580 9: 0% 0% 100% 0% 100%
1581 10: 0% 0% 100% 100% 100%
1582 11: 100% 100% 0% 100% 100%
1583 12: 100% 100% 0% 100% 100%
1584 13: 100% 0% 0% 100% 100%
1586 MEAN: 30% 23% 61% 76% 100%
1587 Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
1588 Results 13 cycles, 3 r regs, 923,076,923 pixels/s
1589 ============================================================================*/
1590 #if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0)
1591 /*--------------------------------------------------------------------------*/
1593 #pragma disablepc all
1595 //#pragma option OutColorPrec=fp16
1596 #pragma texformat default RGBA8
1597 /*==========================================================================*/
1598 half4
FxaaPixelShader(
1599 // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1601 FxaaFloat4 fxaaConsolePosPos
,
1603 //FxaaTex fxaaConsole360TexExpBiasNegOne,
1604 //FxaaTex fxaaConsole360TexExpBiasNegTwo,
1605 //FxaaFloat2 fxaaQualityRcpFrame,
1606 FxaaFloat4 fxaaConsoleRcpFrameOpt
,
1607 FxaaFloat4 fxaaConsoleRcpFrameOpt2
1608 //FxaaFloat4 fxaaConsole360RcpFrameOpt2,
1609 //FxaaFloat fxaaQualitySubpix,
1610 //FxaaFloat fxaaQualityEdgeThreshold,
1611 //FxaaFloat fxaaQualityEdgeThresholdMin,
1612 //FxaaFloat fxaaConsoleEdgeSharpness,
1613 //FxaaFloat fxaaConsoleEdgeThreshold,
1614 //FxaaFloat fxaaConsoleEdgeThresholdMin,
1615 //FxaaFloat4 fxaaConsole360ConstDir
1617 /*--------------------------------------------------------------------------*/
1620 half4 lumaNe
= tex2D(tex
, fxaaConsolePosPos
.zy
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
1621 #if (FXAA_GREEN_AS_LUMA == 0)
1622 lumaNe
.w
+= half(1.0/512.0);
1626 lumaNe
.y
+= half(1.0/512.0);
1630 /*--------------------------------------------------------------------------*/
1632 half4 lumaSw
= tex2D(tex
, fxaaConsolePosPos
.xw
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
1633 #if (FXAA_GREEN_AS_LUMA == 0)
1640 /*--------------------------------------------------------------------------*/
1642 half4 lumaNw
= tex2D(tex
, fxaaConsolePosPos
.xy
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
1643 #if (FXAA_GREEN_AS_LUMA == 0)
1650 /*--------------------------------------------------------------------------*/
1652 half4 lumaSe
= tex2D(tex
, fxaaConsolePosPos
.zw
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
1653 #if (FXAA_GREEN_AS_LUMA == 0)
1660 /*--------------------------------------------------------------------------*/
1663 dir1_pos
.xy
= normalize(dir
.xz
);
1664 half dirAbsMinTimesC
= min(abs(dir1_pos
.x
), abs(dir1_pos
.y
)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS
);
1665 /*--------------------------------------------------------------------------*/
1668 dir2_pos
.xy
= clamp(dir1_pos
.xy
/ dirAbsMinTimesC
, half(-2.0), half(2.0));
1669 dir1_pos
.zw
= pos
.xy
;
1670 dir2_pos
.zw
= pos
.xy
;
1672 temp1N
.xy
= dir1_pos
.zw
- dir1_pos
.xy
* fxaaConsoleRcpFrameOpt
.zw
;
1673 /*--------------------------------------------------------------------------*/
1675 temp1N
= tex2D(tex
, temp1N
.xy
); // h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
1677 rgby1
.xy
= dir1_pos
.zw
+ dir1_pos
.xy
* fxaaConsoleRcpFrameOpt
.zw
;
1678 /*--------------------------------------------------------------------------*/
1680 rgby1
= tex2D(tex
, rgby1
.xy
); // h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
1681 rgby1
= (temp1N
+ rgby1
) * 0.5;
1682 /*--------------------------------------------------------------------------*/
1685 temp2N
.xy
= dir2_pos
.zw
- dir2_pos
.xy
* fxaaConsoleRcpFrameOpt2
.zw
;
1686 temp2N
= tex2D(tex
, temp2N
.xy
); // h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
1687 /*--------------------------------------------------------------------------*/
1690 rgby2
.xy
= dir2_pos
.zw
+ dir2_pos
.xy
* fxaaConsoleRcpFrameOpt2
.zw
;
1691 rgby2
= tex2D(tex
, rgby2
.xy
); // h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
1692 rgby2
= (temp2N
+ rgby2
) * 0.5;
1693 /*--------------------------------------------------------------------------*/
1695 // compilier moves these scalar ops up to other cycles
1696 #if (FXAA_GREEN_AS_LUMA == 0)
1697 half lumaMin
= min(min(lumaNw
.w
, lumaSw
.w
), min(lumaNe
.w
, lumaSe
.w
));
1698 half lumaMax
= max(max(lumaNw
.w
, lumaSw
.w
), max(lumaNe
.w
, lumaSe
.w
));
1700 half lumaMin
= min(min(lumaNw
.y
, lumaSw
.y
), min(lumaNe
.y
, lumaSe
.y
));
1701 half lumaMax
= max(max(lumaNw
.y
, lumaSw
.y
), max(lumaNe
.y
, lumaSe
.y
));
1703 rgby2
= (rgby2
+ rgby1
) * 0.5;
1704 /*--------------------------------------------------------------------------*/
1706 #if (FXAA_GREEN_AS_LUMA == 0)
1707 bool twoTapLt
= rgby2
.w
< lumaMin
;
1708 bool twoTapGt
= rgby2
.w
> lumaMax
;
1710 bool twoTapLt
= rgby2
.y
< lumaMin
;
1711 bool twoTapGt
= rgby2
.y
> lumaMax
;
1713 /*--------------------------------------------------------------------------*/
1715 if(twoTapLt
|| twoTapGt
) rgby2
= rgby1
;
1716 /*--------------------------------------------------------------------------*/
1718 /*==========================================================================*/
1723 /*============================================================================
1725 FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT)
1727 ==============================================================================
1728 The code mostly matches the assembly.
1729 I have a feeling that 14 cycles is possible, but was not able to get there.
1730 Might have to increase register count to get full performance.
1731 Note this shader does not use perspective interpolation.
1733 Use the following cgc options,
1735 --fenable-bx2 --fastmath --fastprecision --nofloatbindings
1737 Use of FXAA_GREEN_AS_LUMA currently adds a cycle (16 clks).
1738 Will look at fixing this for FXAA 3.12.
1739 ------------------------------------------------------------------------------
1741 ------------------------------------------------------------------------------
1742 For reference and to aid in debug, output of NVShaderPerf should match this,
1745 0: texpkb h0.w(TRUE), v5.zyxx, #0
1746 2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
1747 4: texpkb h1.w(TRUE), v5.xwxx, #0
1748 6: addh h0.x(TRUE), h1.w, -h2.y
1749 7: texpkb h2.w(TRUE), v5.zwzz, #0
1750 9: minh h4.w(TRUE), h2.y, h2
1751 10: maxh h5.x(TRUE), h2.y, h2.w
1752 11: texpkb h0.w(TRUE), v5, #0
1753 13: addh h3.w(TRUE), -h0, h0.x
1754 14: addh h0.x(TRUE), h0.w, h0
1755 15: addh h0.z(TRUE), -h2.w, h0.x
1756 16: addh h0.x(TRUE), h2.w, h3.w
1757 17: minh h5.y(TRUE), h0.w, h1.w
1758 18: nrmh h2.xz(TRUE), h0_n
1759 19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z|
1760 20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w
1761 21: movr r1.zw(TRUE), v4.xxxy
1762 22: maxh h2.w(TRUE), h0, h1
1764 24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
1765 26: texpkb h0(TRUE), r0, #0
1766 28: maxh h5.x(TRUE), h2.w, h5
1767 29: minh h5.w(TRUE), h5.y, h4
1768 30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
1769 32: texpkb h2(TRUE), r1, #0
1770 34: addh_d2 h2(TRUE), h0, h2
1771 35: texpkb h1(TRUE), v4, #0
1772 37: maxh h5.y(TRUE), h5.x, h1.w
1773 38: minh h4.w(TRUE), h1, h5
1774 39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1775 41: texpkb h0(TRUE), r0, #0
1776 43: addh_m8 h5.z(TRUE), h5.y, -h4.w
1777 44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
1778 46: texpkb h3(TRUE), r2, #0
1779 48: addh_d2 h0(TRUE), h0, h3
1780 49: addh_d2 h3(TRUE), h0, h2
1781 50: movh h0(TRUE), h3
1782 51: slth h3.x(TRUE), h3.w, h5.w
1783 52: sgth h3.w(TRUE), h3, h5.x
1784 53: addx.c0 rc(TRUE), h3.x, h3
1785 54: slth.c0 rc(TRUE), h5.z, h5
1786 55: movh h0(c0.NE.w), h2
1787 56: movh h0(c0.NE.x), h1
1789 IPU0 ------ Simplified schedule: --------
1790 Pass | Unit | uOp | PC: Op
1791 -----+--------+------+-------------------------
1792 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1793 | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
1794 | SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--;
1796 2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
1797 | TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
1798 | SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---;
1800 3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1801 | TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
1802 | SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---;
1803 | SCB1 | min | 9: MINh h4.w, h2.---y, h2;
1805 4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
1806 | TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
1807 | SCB0 | add | 14: ADDh h0.x, h0.w---, h0;
1808 | SCB1 | add | 13: ADDh h3.w,-h0, h0.---x;
1810 5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---;
1811 | SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-;
1812 | SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--;
1814 6 | SCT1 | mov | 18: NRMh h2.xz, h0;
1815 | SRB | nrm | 18: NRMh h2.xz, h0;
1816 | SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|;
1818 7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--;
1819 | SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy;
1820 | SCB1 | max | 22: MAXh h2.w, h0, h1;
1822 8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--;
1823 | SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0;
1824 | TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0;
1825 | SCB0 | max | 28: MAXh h5.x, h2.w---, h5;
1826 | SCB1 | min | 29: MINh h5.w, h5.---y, h4;
1828 9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--;
1829 | SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0;
1830 | TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0;
1831 | SCB0/1 | add | 34: ADDh/2 h2, h0, h2;
1833 10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
1834 | TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
1835 | SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--;
1836 | SCB1 | min | 38: MINh h4.w, h1, h5;
1838 11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--;
1839 | SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0;
1840 | TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0;
1841 | SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--;
1842 | SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-;
1844 12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0;
1845 | TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0;
1846 | SCB0/1 | add | 48: ADDh/2 h0, h0, h3;
1848 13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2;
1849 | SCB0/1 | mul | 50: MOVh h0, h3;
1851 14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---;
1852 | SCT1 | set | 52: SGTh h3.w, h3, h5.---x;
1853 | SCB0 | set | 54: SLThc0 rc, h5.z---, h5;
1854 | SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3;
1856 15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2;
1857 | SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1;
1877 Pass SCT0 SCT1 TEX SCB0 SCB1
1878 1: 0% 0% 100% 100% 0%
1879 2: 0% 0% 100% 100% 0%
1880 3: 0% 0% 100% 100% 100%
1881 4: 0% 0% 100% 100% 100%
1882 5: 100% 100% 0% 100% 0%
1884 7: 100% 100% 0% 0% 100%
1885 8: 0% 0% 100% 100% 100%
1886 9: 0% 0% 100% 100% 100%
1887 10: 0% 0% 100% 100% 100%
1888 11: 0% 0% 100% 100% 100%
1889 12: 0% 0% 100% 100% 100%
1890 13: 100% 100% 0% 100% 100%
1891 14: 100% 100% 0% 100% 100%
1892 15: 100% 100% 0% 100% 100%
1894 MEAN: 33% 33% 60% 86% 80%
1895 Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
1896 Results 15 cycles, 3 r regs, 800,000,000 pixels/s
1897 ============================================================================*/
1898 #if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1)
1899 /*--------------------------------------------------------------------------*/
1901 #pragma disablepc all
1903 //#pragma option OutColorPrec=fp16
1904 #pragma texformat default RGBA8
1905 /*==========================================================================*/
1906 half4
FxaaPixelShader(
1907 // See FXAA Quality FxaaPixelShader() source for docs on Inputs!
1909 FxaaFloat4 fxaaConsolePosPos
,
1911 //FxaaTex fxaaConsole360TexExpBiasNegOne,
1912 //FxaaTex fxaaConsole360TexExpBiasNegTwo,
1913 //FxaaFloat2 fxaaQualityRcpFrame,
1914 FxaaFloat4 fxaaConsoleRcpFrameOpt
,
1915 FxaaFloat4 fxaaConsoleRcpFrameOpt2
1916 //FxaaFloat4 fxaaConsole360RcpFrameOpt,
1917 //FxaaFloat fxaaQualitySubpix,
1918 //FxaaFloat fxaaQualityEdgeThreshold,
1919 //FxaaFloat fxaaQualityEdgeThresholdMin,
1920 //FxaaFloat fxaaConsoleEdgeSharpness,
1921 //FxaaFloat fxaaConsoleEdgeThreshold,
1922 //FxaaFloat fxaaConsoleEdgeThresholdMin,
1923 //FxaaFloat4 fxaaConsole360ConstDir
1925 /*--------------------------------------------------------------------------*/
1927 half4 rgbyNe
= tex2D(tex
, fxaaConsolePosPos
.zy
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0));
1928 #if (FXAA_GREEN_AS_LUMA == 0)
1929 half lumaNe
= rgbyNe
.w
+ half(1.0/512.0);
1931 half lumaNe
= rgbyNe
.y
+ half(1.0/512.0);
1933 /*--------------------------------------------------------------------------*/
1935 half4 lumaSw
= tex2D(tex
, fxaaConsolePosPos
.xw
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0));
1936 #if (FXAA_GREEN_AS_LUMA == 0)
1937 half lumaSwNegNe
= lumaSw
.w
- lumaNe
;
1939 half lumaSwNegNe
= lumaSw
.y
- lumaNe
;
1941 /*--------------------------------------------------------------------------*/
1943 half4 lumaNw
= tex2D(tex
, fxaaConsolePosPos
.xy
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0));
1944 #if (FXAA_GREEN_AS_LUMA == 0)
1945 half lumaMaxNwSw
= max(lumaNw
.w
, lumaSw
.w
);
1946 half lumaMinNwSw
= min(lumaNw
.w
, lumaSw
.w
);
1948 half lumaMaxNwSw
= max(lumaNw
.y
, lumaSw
.y
);
1949 half lumaMinNwSw
= min(lumaNw
.y
, lumaSw
.y
);
1951 /*--------------------------------------------------------------------------*/
1953 half4 lumaSe
= tex2D(tex
, fxaaConsolePosPos
.zw
); // h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0));
1954 #if (FXAA_GREEN_AS_LUMA == 0)
1955 half dirZ
= lumaNw
.w
+ lumaSwNegNe
;
1956 half dirX
= -lumaNw
.w
+ lumaSwNegNe
;
1958 half dirZ
= lumaNw
.y
+ lumaSwNegNe
;
1959 half dirX
= -lumaNw
.y
+ lumaSwNegNe
;
1961 /*--------------------------------------------------------------------------*/
1965 #if (FXAA_GREEN_AS_LUMA == 0)
1966 dir
.x
= lumaSe
.w
+ dirX
;
1967 dir
.z
= -lumaSe
.w
+ dirZ
;
1968 half lumaMinNeSe
= min(lumaNe
, lumaSe
.w
);
1970 dir
.x
= lumaSe
.y
+ dirX
;
1971 dir
.z
= -lumaSe
.y
+ dirZ
;
1972 half lumaMinNeSe
= min(lumaNe
, lumaSe
.y
);
1974 /*--------------------------------------------------------------------------*/
1977 dir1_pos
.xy
= normalize(dir
).xz
;
1978 half dirAbsMinTimes8
= min(abs(dir1_pos
.x
), abs(dir1_pos
.y
)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS
);
1979 /*--------------------------------------------------------------------------*/
1982 dir2_pos
.xy
= clamp(dir1_pos
.xy
/ dirAbsMinTimes8
, half(-2.0), half(2.0));
1983 dir1_pos
.zw
= pos
.xy
;
1984 dir2_pos
.zw
= pos
.xy
;
1985 #if (FXAA_GREEN_AS_LUMA == 0)
1986 half lumaMaxNeSe
= max(lumaNe
, lumaSe
.w
);
1988 half lumaMaxNeSe
= max(lumaNe
, lumaSe
.y
);
1990 /*--------------------------------------------------------------------------*/
1993 temp1N
.xy
= dir1_pos
.zw
- dir1_pos
.xy
* fxaaConsoleRcpFrameOpt
.zw
;
1994 temp1N
= tex2D(tex
, temp1N
.xy
); // h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
1995 half lumaMax
= max(lumaMaxNwSw
, lumaMaxNeSe
);
1996 half lumaMin
= min(lumaMinNwSw
, lumaMinNeSe
);
1997 /*--------------------------------------------------------------------------*/
2000 rgby1
.xy
= dir1_pos
.zw
+ dir1_pos
.xy
* fxaaConsoleRcpFrameOpt
.zw
;
2001 rgby1
= tex2D(tex
, rgby1
.xy
); // h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
2002 rgby1
= (temp1N
+ rgby1
) * 0.5;
2003 /*--------------------------------------------------------------------------*/
2005 half4 rgbyM
= tex2D(tex
, pos
.xy
); // h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0));
2006 #if (FXAA_GREEN_AS_LUMA == 0)
2007 half lumaMaxM
= max(lumaMax
, rgbyM
.w
);
2008 half lumaMinM
= min(lumaMin
, rgbyM
.w
);
2010 half lumaMaxM
= max(lumaMax
, rgbyM
.y
);
2011 half lumaMinM
= min(lumaMin
, rgbyM
.y
);
2013 /*--------------------------------------------------------------------------*/
2016 temp2N
.xy
= dir2_pos
.zw
- dir2_pos
.xy
* fxaaConsoleRcpFrameOpt2
.zw
;
2017 temp2N
= tex2D(tex
, temp2N
.xy
); // h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
2019 rgby2
.xy
= dir2_pos
.zw
+ dir2_pos
.xy
* fxaaConsoleRcpFrameOpt2
.zw
;
2020 half lumaRangeM
= (lumaMaxM
- lumaMinM
) / FXAA_CONSOLE__PS3_EDGE_THRESHOLD
;
2021 /*--------------------------------------------------------------------------*/
2023 rgby2
= tex2D(tex
, rgby2
.xy
); // h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
2024 rgby2
= (temp2N
+ rgby2
) * 0.5;
2025 /*--------------------------------------------------------------------------*/
2027 rgby2
= (rgby2
+ rgby1
) * 0.5;
2028 /*--------------------------------------------------------------------------*/
2030 #if (FXAA_GREEN_AS_LUMA == 0)
2031 bool twoTapLt
= rgby2
.w
< lumaMin
;
2032 bool twoTapGt
= rgby2
.w
> lumaMax
;
2034 bool twoTapLt
= rgby2
.y
< lumaMin
;
2035 bool twoTapGt
= rgby2
.y
> lumaMax
;
2037 bool earlyExit
= lumaRangeM
< lumaMax
;
2038 bool twoTap
= twoTapLt
|| twoTapGt
;
2039 /*--------------------------------------------------------------------------*/
2041 if(twoTap
) rgby2
= rgby1
;
2042 if(earlyExit
) rgby2
= rgbyM
;
2043 /*--------------------------------------------------------------------------*/
2045 /*==========================================================================*/