From fdd8e34afbe739c326f50756fc3bc8923bd3e45c Mon Sep 17 00:00:00 2001 From: Peter Thoman Date: Sun, 21 Oct 2012 21:39:47 +0200 Subject: [PATCH] Version 1.8 --- Actions.def | 6 +- DATA/DSfix.ini | 8 +- DATA/DSfixKeys.ini | 5 +- DATA/VERSIONS.txt | 7 + DATA/dsfix/FXAA.fx | 116 ++ DATA/dsfix/FXAA.h | 2048 ++++++++++++++++++++++++++++ DATA/dsfix/VSSAO.fx | 60 +- DATA/dsfix/vssaothicknessmodel.png | Bin 3726 -> 0 bytes DSfix.v11.suo | Bin 91136 -> 94720 bytes DSfix.vcxproj | 2 + FPS.cpp | 9 +- FXAA.cpp | 92 ++ FXAA.h | 31 + RenderstateManager.cpp | 33 +- RenderstateManager.h | 11 +- Settings.cpp | 13 + Settings.def | 3 +- Settings.h | 5 + VSSAO.cpp | 7 - VSSAO.h | 4 +- main.h | 2 +- 21 files changed, 2382 insertions(+), 80 deletions(-) create mode 100644 DATA/dsfix/FXAA.fx create mode 100644 DATA/dsfix/FXAA.h delete mode 100644 DATA/dsfix/vssaothicknessmodel.png create mode 100644 FXAA.cpp create mode 100644 FXAA.h diff --git a/Actions.def b/Actions.def index e08c3a6..2c7e764 100644 --- a/Actions.def +++ b/Actions.def @@ -14,17 +14,21 @@ ACTION(takeHudlessScreenshot, RSManager::get().enableTakeScreenshot()) // Graphics Actions -ACTION(toggleSMAA, RSManager::get().toggleSmaa()) +ACTION(toggleAA, RSManager::get().toggleAA()) ACTION(toggleVSSAO, RSManager::get().toggleVssao()) ACTION(toggleDofGauss, RSManager::get().toggleDofGauss()) ACTION(toggleHUD, RSManager::get().toggleHideHud()) ACTION(toggleHUDChanges, RSManager::get().toggleChangeHud()) +ACTION(toggle30FPSLimit, Settings::get().toggle30FPSLimit()) + // Development Actions ACTION(reloadSSAOEffect, RSManager::get().reloadVssao()) ACTION(reloadGAUSSEffect, RSManager::get().reloadGauss()) +ACTION(reloadAAEffect, RSManager::get().reloadAA()) + ACTION(singleFrameFullCapture, RSManager::get().enableSingleFrameCapture()) ACTION(userTrigger, SDLOG(0, "================================================================= USER TRIGGER ===\n")) ACTION(reloadHUDVertices, RSManager::get().reloadHudVertices()); diff --git a/DATA/DSfix.ini b/DATA/DSfix.ini index 0192484..584b3e5 100644 --- a/DATA/DSfix.ini +++ b/DATA/DSfix.ini @@ -7,13 +7,17 @@ renderWidth 1920 renderHeight 1080 -# SMAA1x toggle and quality setting +# AA toggle and quality setting # 0 = off (best performance, worst IQ) # 1 = low # 2 = medium # 3 = high # 4 = ultra (worst performance, best IQ) -smaaQuality 0 +aaQuality 0 + +# AA type +# either "SMAA" or "FXAA" +aaType SMAA # Enable and set the strength of the SSAO effect (all 3 settings have the same performance impact!) # 0 = off diff --git a/DATA/DSfixKeys.ini b/DATA/DSfixKeys.ini index 7249ce3..2d793a5 100644 --- a/DATA/DSfixKeys.ini +++ b/DATA/DSfixKeys.ini @@ -8,12 +8,15 @@ toggleBorderlessFullscreen VK_F8 #takeHudlessScreenshot VK_NEXT toggleHUD VK_RCONTROL -#toggleHUDChanges VK_RSHIFT +toggleHUDChanges VK_RSHIFT #toggleSMAA VK_NUMPAD1 #toggleVSSAO VK_NUMPAD2 #toggleDofGauss VK_NUMPAD3 +toggle30FPSLimit VK_BACK + +#reloadAAEffect VK_NUMPAD4 #reloadSSAOEffect VK_NUMPAD5 #reloadGAUSSEffect VK_NUMPAD6 #reloadHUDVertices VK_NUMPAD9 diff --git a/DATA/VERSIONS.txt b/DATA/VERSIONS.txt index 0ea9fce..84ad798 100644 --- a/DATA/VERSIONS.txt +++ b/DATA/VERSIONS.txt @@ -1,3 +1,10 @@ +21-10-1012 -- version 1.8 beta +============================== +- Added FXAA option for AA +- Added 30 FPS limit toggle +- Greatly improved SSAO depth usage, removed thickness model hack + + 29-09-1012 -- version 1.7 beta ============================== - Re-implemented the FPS limiter. It's quite exact now and also performed at a better point in time diff --git a/DATA/dsfix/FXAA.fx b/DATA/dsfix/FXAA.fx new file mode 100644 index 0000000..3899c92 --- /dev/null +++ b/DATA/dsfix/FXAA.fx @@ -0,0 +1,116 @@ +texture2D frameTex2D; + +#define FXAA_PC 1 +#define FXAA_HLSL_3 1 + +#ifndef FXAA_QUALITY__PRESET +#define FXAA_QUALITY__PRESET 26 +#endif + +#include "FXAA.h" + +sampler frameSampler = sampler_state +{ + texture = ; + AddressU = CLAMP; + AddressV = CLAMP; + MINFILTER = LINEAR; + MAGFILTER = LINEAR; +}; + +struct VSOUT +{ + float4 vertPos : POSITION; + float2 UVCoord : TEXCOORD0; +}; + +struct VSIN +{ + float4 vertPos : POSITION0; + float2 UVCoord : TEXCOORD0; +}; + +VSOUT FrameVS(VSIN IN) +{ + VSOUT OUT; + OUT.vertPos = IN.vertPos; + OUT.UVCoord = IN.UVCoord; + return OUT; +} + +float4 calcLuma(VSOUT IN) : COLOR0 +{ + float4 color = tex2D(frameSampler, IN.UVCoord); + color.a = dot(color.rgb, float3(0.299, 0.587, 0.114)); + //return float4(color.a,color.a,color.a,color.a); + return color; +} + +float4 applyFXAA(VSOUT IN) : COLOR0 +{ + return FxaaPixelShader(IN.UVCoord, float4(0,0,0,0), frameSampler, frameSampler, + frameSampler, PIXEL_SIZE, float4(0,0,0,0), float4(0,0,0,0), float4(0,0,0,0), + // + // Only used on FXAA Quality. + // This used to be the FXAA_QUALITY__SUBPIX define. + // It is here now to allow easier tuning. + // Choose the amount of sub-pixel aliasing removal. + // This can effect sharpness. + // 1.00 - upper limit (softer) + // 0.75 - default amount of filtering + // 0.50 - lower limit (sharper, less sub-pixel aliasing removal) + // 0.25 - almost off + // 0.00 - completely off + 0.75, + // + // Only used on FXAA Quality. + // This used to be the FXAA_QUALITY__EDGE_THRESHOLD define. + // It is here now to allow easier tuning. + // The minimum amount of local contrast required to apply algorithm. + // 0.333 - too little (faster) + // 0.250 - low quality + // 0.166 - default + // 0.125 - high quality + // 0.063 - overkill (slower) + 0.125, + // + // Only used on FXAA Quality. + // This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define. + // It is here now to allow easier tuning. + // Trims the algorithm from processing darks. + // 0.0833 - upper limit (default, the start of visible unfiltered edges) + // 0.0625 - high quality (faster) + // 0.0312 - visible limit (slower) + // Special notes when using FXAA_GREEN_AS_LUMA, + // Likely want to set this to zero. + // As colors that are mostly not-green + // will appear very dark in the green channel! + // Tune by looking at mostly non-green content, + // then start at zero and increase until aliasing is a problem. + 0.0312, + 8.0, 0.125, 0.05, float4(0,0,0,0) ); +} + +technique t0 +{ + pass P0 + { + VertexShader = compile vs_3_0 FrameVS(); + PixelShader = compile ps_3_0 calcLuma(); + ZEnable = false; + SRGBWriteEnable = false; + AlphaBlendEnable = false; + AlphaTestEnable = false; + ColorWriteEnable = RED|GREEN|BLUE|ALPHA; + } + + pass P1 + { + VertexShader = compile vs_3_0 FrameVS(); + PixelShader = compile ps_3_0 applyFXAA(); + ZEnable = false; + SRGBWriteEnable = false; + AlphaBlendEnable = false; + AlphaTestEnable = false; + } +} diff --git a/DATA/dsfix/FXAA.h b/DATA/dsfix/FXAA.h new file mode 100644 index 0000000..37c5581 --- /dev/null +++ b/DATA/dsfix/FXAA.h @@ -0,0 +1,2048 @@ +/*============================================================================ + + + NVIDIA FXAA 3.11 by TIMOTHY LOTTES + + +------------------------------------------------------------------------------ +COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED. +------------------------------------------------------------------------------ +TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED +*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA +OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR +CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR +LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, +OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE +THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + +------------------------------------------------------------------------------ + INTEGRATION CHECKLIST +------------------------------------------------------------------------------ +(1.) +In the shader source, setup defines for the desired configuration. +When providing multiple shaders (for different presets), +simply setup the defines differently in multiple files. +Example, + + #define FXAA_PC 1 + #define FXAA_HLSL_5 1 + #define FXAA_QUALITY__PRESET 12 + +Or, + + #define FXAA_360 1 + +Or, + + #define FXAA_PS3 1 + +Etc. + +(2.) +Then include this file, + + #include "Fxaa3_11.h" + +(3.) +Then call the FXAA pixel shader from within your desired shader. +Look at the FXAA Quality FxaaPixelShader() for docs on inputs. +As for FXAA 3.11 all inputs for all shaders are the same +to enable easy porting between platforms. + + return FxaaPixelShader(...); + +(4.) +Insure pass prior to FXAA outputs RGBL (see next section). +Or use, + + #define FXAA_GREEN_AS_LUMA 1 + +(5.) +Setup engine to provide the following constants +which are used in the FxaaPixelShader() inputs, + + FxaaFloat2 fxaaQualityRcpFrame, + FxaaFloat4 fxaaConsoleRcpFrameOpt, + FxaaFloat4 fxaaConsoleRcpFrameOpt2, + FxaaFloat4 fxaaConsole360RcpFrameOpt2, + FxaaFloat fxaaQualitySubpix, + FxaaFloat fxaaQualityEdgeThreshold, + FxaaFloat fxaaQualityEdgeThresholdMin, + FxaaFloat fxaaConsoleEdgeSharpness, + FxaaFloat fxaaConsoleEdgeThreshold, + FxaaFloat fxaaConsoleEdgeThresholdMin, + FxaaFloat4 fxaaConsole360ConstDir + +Look at the FXAA Quality FxaaPixelShader() for docs on inputs. + +(6.) +Have FXAA vertex shader run as a full screen triangle, +and output "pos" and "fxaaConsolePosPos" +such that inputs in the pixel shader provide, + + // {xy} = center of pixel + FxaaFloat2 pos, + + // {xy__} = upper left of pixel + // {__zw} = lower right of pixel + FxaaFloat4 fxaaConsolePosPos, + +(7.) +Insure the texture sampler(s) used by FXAA are set to bilinear filtering. + + +------------------------------------------------------------------------------ + INTEGRATION - RGBL AND COLORSPACE +------------------------------------------------------------------------------ +FXAA3 requires RGBL as input unless the following is set, + + #define FXAA_GREEN_AS_LUMA 1 + +In which case the engine uses green in place of luma, +and requires RGB input is in a non-linear colorspace. + +RGB should be LDR (low dynamic range). +Specifically do FXAA after tonemapping. + +RGB data as returned by a texture fetch can be non-linear, +or linear when FXAA_GREEN_AS_LUMA is not set. +Note an "sRGB format" texture counts as linear, +because the result of a texture fetch is linear data. +Regular "RGBA8" textures in the sRGB colorspace are non-linear. + +If FXAA_GREEN_AS_LUMA is not set, +luma must be stored in the alpha channel prior to running FXAA. +This luma should be in a perceptual space (could be gamma 2.0). +Example pass before FXAA where output is gamma 2.0 encoded, + + color.rgb = ToneMap(color.rgb); // linear color output + color.rgb = sqrt(color.rgb); // gamma 2.0 color output + return color; + +To use FXAA, + + color.rgb = ToneMap(color.rgb); // linear color output + color.rgb = sqrt(color.rgb); // gamma 2.0 color output + color.a = dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114)); // compute luma + return color; + +Another example where output is linear encoded, +say for instance writing to an sRGB formated render target, +where the render target does the conversion back to sRGB after blending, + + color.rgb = ToneMap(color.rgb); // linear color output + return color; + +To use FXAA, + + color.rgb = ToneMap(color.rgb); // linear color output + color.a = sqrt(dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114))); // compute luma + return color; + +Getting luma correct is required for the algorithm to work correctly. + + +------------------------------------------------------------------------------ + BEING LINEARLY CORRECT? +------------------------------------------------------------------------------ +Applying FXAA to a framebuffer with linear RGB color will look worse. +This is very counter intuitive, but happends to be true in this case. +The reason is because dithering artifacts will be more visiable +in a linear colorspace. + + +------------------------------------------------------------------------------ + COMPLEX INTEGRATION +------------------------------------------------------------------------------ +Q. What if the engine is blending into RGB before wanting to run FXAA? + +A. In the last opaque pass prior to FXAA, + have the pass write out luma into alpha. + Then blend into RGB only. + FXAA should be able to run ok + assuming the blending pass did not any add aliasing. + This should be the common case for particles and common blending passes. + +A. Or use FXAA_GREEN_AS_LUMA. + +============================================================================*/ + +/*============================================================================ + + INTEGRATION KNOBS + +============================================================================*/ +// +// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE). +// FXAA_360_OPT is a prototype for the new optimized 360 version. +// +// 1 = Use API. +// 0 = Don't use API. +// +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_PS3 + #define FXAA_PS3 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_360 + #define FXAA_360 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_360_OPT + #define FXAA_360_OPT 0 +#endif +/*==========================================================================*/ +#ifndef FXAA_PC + // + // FXAA Quality + // The high quality PC algorithm. + // + #define FXAA_PC 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_PC_CONSOLE + // + // The console algorithm for PC is included + // for developers targeting really low spec machines. + // Likely better to just run FXAA_PC, and use a really low preset. + // + #define FXAA_PC_CONSOLE 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GLSL_120 + #define FXAA_GLSL_120 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GLSL_130 + #define FXAA_GLSL_130 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_3 + #define FXAA_HLSL_3 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_4 + #define FXAA_HLSL_4 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_5 + #define FXAA_HLSL_5 0 +#endif +/*==========================================================================*/ +#ifndef FXAA_GREEN_AS_LUMA + // + // For those using non-linear color, + // and either not able to get luma in alpha, or not wanting to, + // this enables FXAA to run using green as a proxy for luma. + // So with this enabled, no need to pack luma in alpha. + // + // This will turn off AA on anything which lacks some amount of green. + // Pure red and blue or combination of only R and B, will get no AA. + // + // Might want to lower the settings for both, + // fxaaConsoleEdgeThresholdMin + // fxaaQualityEdgeThresholdMin + // In order to insure AA does not get turned off on colors + // which contain a minor amount of green. + // + // 1 = On. + // 0 = Off. + // + #define FXAA_GREEN_AS_LUMA 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_EARLY_EXIT + // + // Controls algorithm's early exit path. + // On PS3 turning this ON adds 2 cycles to the shader. + // On 360 turning this OFF adds 10ths of a millisecond to the shader. + // Turning this off on console will result in a more blurry image. + // So this defaults to on. + // + // 1 = On. + // 0 = Off. + // + #define FXAA_EARLY_EXIT 1 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_DISCARD + // + // Only valid for PC OpenGL currently. + // Probably will not work when FXAA_GREEN_AS_LUMA = 1. + // + // 1 = Use discard on pixels which don't need AA. + // For APIs which enable concurrent TEX+ROP from same surface. + // 0 = Return unchanged color on pixels which don't need AA. + // + #define FXAA_DISCARD 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_FAST_PIXEL_OFFSET + // + // Used for GLSL 120 only. + // + // 1 = GL API supports fast pixel offsets + // 0 = do not use fast pixel offsets + // + #ifdef GL_EXT_gpu_shader4 + #define FXAA_FAST_PIXEL_OFFSET 1 + #endif + #ifdef GL_NV_gpu_shader5 + #define FXAA_FAST_PIXEL_OFFSET 1 + #endif + #ifdef GL_ARB_gpu_shader5 + #define FXAA_FAST_PIXEL_OFFSET 1 + #endif + #ifndef FXAA_FAST_PIXEL_OFFSET + #define FXAA_FAST_PIXEL_OFFSET 0 + #endif +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GATHER4_ALPHA + // + // 1 = API supports gather4 on alpha channel. + // 0 = API does not support gather4 on alpha channel. + // + #if (FXAA_HLSL_5 == 1) + #define FXAA_GATHER4_ALPHA 1 + #endif + #ifdef GL_ARB_gpu_shader5 + #define FXAA_GATHER4_ALPHA 1 + #endif + #ifdef GL_NV_gpu_shader5 + #define FXAA_GATHER4_ALPHA 1 + #endif + #ifndef FXAA_GATHER4_ALPHA + #define FXAA_GATHER4_ALPHA 0 + #endif +#endif + +/*============================================================================ + FXAA CONSOLE PS3 - TUNING KNOBS +============================================================================*/ +#ifndef FXAA_CONSOLE__PS3_EDGE_SHARPNESS + // + // Consoles the sharpness of edges on PS3 only. + // Non-PS3 tuning is done with shader input. + // + // Due to the PS3 being ALU bound, + // there are only two safe values here: 4 and 8. + // These options use the shaders ability to a free *|/ by 2|4|8. + // + // 8.0 is sharper + // 4.0 is softer + // 2.0 is really soft (good for vector graphics inputs) + // + #if 1 + #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 8.0 + #endif + #if 0 + #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 4.0 + #endif + #if 0 + #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 2.0 + #endif +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_CONSOLE__PS3_EDGE_THRESHOLD + // + // Only effects PS3. + // Non-PS3 tuning is done with shader input. + // + // The minimum amount of local contrast required to apply algorithm. + // The console setting has a different mapping than the quality setting. + // + // This only applies when FXAA_EARLY_EXIT is 1. + // + // Due to the PS3 being ALU bound, + // there are only two safe values here: 0.25 and 0.125. + // These options use the shaders ability to a free *|/ by 2|4|8. + // + // 0.125 leaves less aliasing, but is softer + // 0.25 leaves more aliasing, and is sharper + // + #if 1 + #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.125 + #else + #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.25 + #endif +#endif + +/*============================================================================ + FXAA QUALITY - TUNING KNOBS +------------------------------------------------------------------------------ +NOTE the other tuning knobs are now in the shader function inputs! +============================================================================*/ +#ifndef FXAA_QUALITY__PRESET + // + // Choose the quality preset. + // This needs to be compiled into the shader as it effects code. + // Best option to include multiple presets is to + // in each shader define the preset, then include this file. + // + // OPTIONS + // ----------------------------------------------------------------------- + // 10 to 15 - default medium dither (10=fastest, 15=highest quality) + // 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality) + // 39 - no dither, very expensive + // + // NOTES + // ----------------------------------------------------------------------- + // 12 = slightly faster then FXAA 3.9 and higher edge quality (default) + // 13 = about same speed as FXAA 3.9 and better than 12 + // 23 = closest to FXAA 3.9 visually and performance wise + // _ = the lowest digit is directly related to performance + // _ = the highest digit is directly related to style + // + #define FXAA_QUALITY__PRESET 12 +#endif + + +/*============================================================================ + + FXAA QUALITY - PRESETS + +============================================================================*/ + +/*============================================================================ + FXAA QUALITY - MEDIUM DITHER PRESETS +============================================================================*/ +#if (FXAA_QUALITY__PRESET == 10) + #define FXAA_QUALITY__PS 3 + #define FXAA_QUALITY__P0 1.5 + #define FXAA_QUALITY__P1 3.0 + #define FXAA_QUALITY__P2 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 11) + #define FXAA_QUALITY__PS 4 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 3.0 + #define FXAA_QUALITY__P3 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 12) + #define FXAA_QUALITY__PS 5 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 4.0 + #define FXAA_QUALITY__P4 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 13) + #define FXAA_QUALITY__PS 6 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 4.0 + #define FXAA_QUALITY__P5 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 14) + #define FXAA_QUALITY__PS 7 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 4.0 + #define FXAA_QUALITY__P6 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 15) + #define FXAA_QUALITY__PS 8 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 2.0 + #define FXAA_QUALITY__P6 4.0 + #define FXAA_QUALITY__P7 12.0 +#endif + +/*============================================================================ + FXAA QUALITY - LOW DITHER PRESETS +============================================================================*/ +#if (FXAA_QUALITY__PRESET == 20) + #define FXAA_QUALITY__PS 3 + #define FXAA_QUALITY__P0 1.5 + #define FXAA_QUALITY__P1 2.0 + #define FXAA_QUALITY__P2 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 21) + #define FXAA_QUALITY__PS 4 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 22) + #define FXAA_QUALITY__PS 5 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 23) + #define FXAA_QUALITY__PS 6 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 24) + #define FXAA_QUALITY__PS 7 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 3.0 + #define FXAA_QUALITY__P6 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 25) + #define FXAA_QUALITY__PS 8 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 2.0 + #define FXAA_QUALITY__P6 4.0 + #define FXAA_QUALITY__P7 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 26) + #define FXAA_QUALITY__PS 9 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 2.0 + #define FXAA_QUALITY__P6 2.0 + #define FXAA_QUALITY__P7 4.0 + #define FXAA_QUALITY__P8 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 27) + #define FXAA_QUALITY__PS 10 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 2.0 + #define FXAA_QUALITY__P6 2.0 + #define FXAA_QUALITY__P7 2.0 + #define FXAA_QUALITY__P8 4.0 + #define FXAA_QUALITY__P9 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 28) + #define FXAA_QUALITY__PS 11 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 2.0 + #define FXAA_QUALITY__P6 2.0 + #define FXAA_QUALITY__P7 2.0 + #define FXAA_QUALITY__P8 2.0 + #define FXAA_QUALITY__P9 4.0 + #define FXAA_QUALITY__P10 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 29) + #define FXAA_QUALITY__PS 12 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.5 + #define FXAA_QUALITY__P2 2.0 + #define FXAA_QUALITY__P3 2.0 + #define FXAA_QUALITY__P4 2.0 + #define FXAA_QUALITY__P5 2.0 + #define FXAA_QUALITY__P6 2.0 + #define FXAA_QUALITY__P7 2.0 + #define FXAA_QUALITY__P8 2.0 + #define FXAA_QUALITY__P9 2.0 + #define FXAA_QUALITY__P10 4.0 + #define FXAA_QUALITY__P11 8.0 +#endif + +/*============================================================================ + FXAA QUALITY - EXTREME QUALITY +============================================================================*/ +#if (FXAA_QUALITY__PRESET == 39) + #define FXAA_QUALITY__PS 12 + #define FXAA_QUALITY__P0 1.0 + #define FXAA_QUALITY__P1 1.0 + #define FXAA_QUALITY__P2 1.0 + #define FXAA_QUALITY__P3 1.0 + #define FXAA_QUALITY__P4 1.0 + #define FXAA_QUALITY__P5 1.5 + #define FXAA_QUALITY__P6 2.0 + #define FXAA_QUALITY__P7 2.0 + #define FXAA_QUALITY__P8 2.0 + #define FXAA_QUALITY__P9 2.0 + #define FXAA_QUALITY__P10 4.0 + #define FXAA_QUALITY__P11 8.0 +#endif + + + +/*============================================================================ + + API PORTING + +============================================================================*/ +#if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1) + #define FxaaBool bool + #define FxaaDiscard discard + #define FxaaFloat float + #define FxaaFloat2 vec2 + #define FxaaFloat3 vec3 + #define FxaaFloat4 vec4 + #define FxaaHalf float + #define FxaaHalf2 vec2 + #define FxaaHalf3 vec3 + #define FxaaHalf4 vec4 + #define FxaaInt2 ivec2 + #define FxaaSat(x) clamp(x, 0.0, 1.0) + #define FxaaTex sampler2D +#else + #define FxaaBool bool + #define FxaaDiscard clip(-1) + #define FxaaFloat float + #define FxaaFloat2 float2 + #define FxaaFloat3 float3 + #define FxaaFloat4 float4 + #define FxaaHalf half + #define FxaaHalf2 half2 + #define FxaaHalf3 half3 + #define FxaaHalf4 half4 + #define FxaaSat(x) saturate(x) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_GLSL_120 == 1) + // Requires, + // #version 120 + // And at least, + // #extension GL_EXT_gpu_shader4 : enable + // (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9) + #define FxaaTexTop(t, p) texture2DLod(t, p, 0.0) + #if (FXAA_FAST_PIXEL_OFFSET == 1) + #define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o) + #else + #define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0) + #endif + #if (FXAA_GATHER4_ALPHA == 1) + // use #extension GL_ARB_gpu_shader5 : enable + #define FxaaTexAlpha4(t, p) textureGather(t, p, 3) + #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3) + #define FxaaTexGreen4(t, p) textureGather(t, p, 1) + #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1) + #endif +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_GLSL_130 == 1) + // Requires "#version 130" or better + #define FxaaTexTop(t, p) textureLod(t, p, 0.0) + #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o) + #if (FXAA_GATHER4_ALPHA == 1) + // use #extension GL_ARB_gpu_shader5 : enable + #define FxaaTexAlpha4(t, p) textureGather(t, p, 3) + #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3) + #define FxaaTexGreen4(t, p) textureGather(t, p, 1) + #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1) + #endif +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) || (FXAA_PS3 == 1) + #define FxaaInt2 float2 + #define FxaaTex sampler2D + #define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0)) + #define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0)) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_4 == 1) + #define FxaaInt2 int2 + struct FxaaTex { SamplerState smpl; Texture2D tex; }; + #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) + #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_5 == 1) + #define FxaaInt2 int2 + struct FxaaTex { SamplerState smpl; Texture2D tex; }; + #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) + #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) + #define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p) + #define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o) + #define FxaaTexGreen4(t, p) t.tex.GatherGreen(t.smpl, p) + #define FxaaTexOffGreen4(t, p, o) t.tex.GatherGreen(t.smpl, p, o) +#endif + + +/*============================================================================ + GREEN AS LUMA OPTION SUPPORT FUNCTION +============================================================================*/ +#if (FXAA_GREEN_AS_LUMA == 0) + FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.w; } +#else + FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.y; } +#endif + + + + +/*============================================================================ + + FXAA3 QUALITY - PC + +============================================================================*/ +#if (FXAA_PC == 1) +/*--------------------------------------------------------------------------*/ +FxaaFloat4 FxaaPixelShader( + // + // Use noperspective interpolation here (turn off perspective interpolation). + // {xy} = center of pixel + FxaaFloat2 pos, + // + // Used only for FXAA Console, and not used on the 360 version. + // Use noperspective interpolation here (turn off perspective interpolation). + // {xy__} = upper left of pixel + // {__zw} = lower right of pixel + FxaaFloat4 fxaaConsolePosPos, + // + // Input color texture. + // {rgb_} = color in linear or perceptual color space + // if (FXAA_GREEN_AS_LUMA == 0) + // {___a} = luma in perceptual color space (not linear) + FxaaTex tex, + // + // Only used on the optimized 360 version of FXAA Console. + // For everything but 360, just use the same input here as for "tex". + // For 360, same texture, just alias with a 2nd sampler. + // This sampler needs to have an exponent bias of -1. + FxaaTex fxaaConsole360TexExpBiasNegOne, + // + // Only used on the optimized 360 version of FXAA Console. + // For everything but 360, just use the same input here as for "tex". + // For 360, same texture, just alias with a 3nd sampler. + // This sampler needs to have an exponent bias of -2. + FxaaTex fxaaConsole360TexExpBiasNegTwo, + // + // Only used on FXAA Quality. + // This must be from a constant/uniform. + // {x_} = 1.0/screenWidthInPixels + // {_y} = 1.0/screenHeightInPixels + FxaaFloat2 fxaaQualityRcpFrame, + // + // Only used on FXAA Console. + // This must be from a constant/uniform. + // This effects sub-pixel AA quality and inversely sharpness. + // Where N ranges between, + // N = 0.50 (default) + // N = 0.33 (sharper) + // {x___} = -N/screenWidthInPixels + // {_y__} = -N/screenHeightInPixels + // {__z_} = N/screenWidthInPixels + // {___w} = N/screenHeightInPixels + FxaaFloat4 fxaaConsoleRcpFrameOpt, + // + // Only used on FXAA Console. + // Not used on 360, but used on PS3 and PC. + // This must be from a constant/uniform. + // {x___} = -2.0/screenWidthInPixels + // {_y__} = -2.0/screenHeightInPixels + // {__z_} = 2.0/screenWidthInPixels + // {___w} = 2.0/screenHeightInPixels + FxaaFloat4 fxaaConsoleRcpFrameOpt2, + // + // Only used on FXAA Console. + // Only used on 360 in place of fxaaConsoleRcpFrameOpt2. + // This must be from a constant/uniform. + // {x___} = 8.0/screenWidthInPixels + // {_y__} = 8.0/screenHeightInPixels + // {__z_} = -4.0/screenWidthInPixels + // {___w} = -4.0/screenHeightInPixels + FxaaFloat4 fxaaConsole360RcpFrameOpt2, + // + // Only used on FXAA Quality. + // This used to be the FXAA_QUALITY__SUBPIX define. + // It is here now to allow easier tuning. + // Choose the amount of sub-pixel aliasing removal. + // This can effect sharpness. + // 1.00 - upper limit (softer) + // 0.75 - default amount of filtering + // 0.50 - lower limit (sharper, less sub-pixel aliasing removal) + // 0.25 - almost off + // 0.00 - completely off + FxaaFloat fxaaQualitySubpix, + // + // Only used on FXAA Quality. + // This used to be the FXAA_QUALITY__EDGE_THRESHOLD define. + // It is here now to allow easier tuning. + // The minimum amount of local contrast required to apply algorithm. + // 0.333 - too little (faster) + // 0.250 - low quality + // 0.166 - default + // 0.125 - high quality + // 0.063 - overkill (slower) + FxaaFloat fxaaQualityEdgeThreshold, + // + // Only used on FXAA Quality. + // This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define. + // It is here now to allow easier tuning. + // Trims the algorithm from processing darks. + // 0.0833 - upper limit (default, the start of visible unfiltered edges) + // 0.0625 - high quality (faster) + // 0.0312 - visible limit (slower) + // Special notes when using FXAA_GREEN_AS_LUMA, + // Likely want to set this to zero. + // As colors that are mostly not-green + // will appear very dark in the green channel! + // Tune by looking at mostly non-green content, + // then start at zero and increase until aliasing is a problem. + FxaaFloat fxaaQualityEdgeThresholdMin, + // + // Only used on FXAA Console. + // This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define. + // It is here now to allow easier tuning. + // This does not effect PS3, as this needs to be compiled in. + // Use FXAA_CONSOLE__PS3_EDGE_SHARPNESS for PS3. + // Due to the PS3 being ALU bound, + // there are only three safe values here: 2 and 4 and 8. + // These options use the shaders ability to a free *|/ by 2|4|8. + // For all other platforms can be a non-power of two. + // 8.0 is sharper (default!!!) + // 4.0 is softer + // 2.0 is really soft (good only for vector graphics inputs) + FxaaFloat fxaaConsoleEdgeSharpness, + // + // Only used on FXAA Console. + // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define. + // It is here now to allow easier tuning. + // This does not effect PS3, as this needs to be compiled in. + // Use FXAA_CONSOLE__PS3_EDGE_THRESHOLD for PS3. + // Due to the PS3 being ALU bound, + // there are only two safe values here: 1/4 and 1/8. + // These options use the shaders ability to a free *|/ by 2|4|8. + // The console setting has a different mapping than the quality setting. + // Other platforms can use other values. + // 0.125 leaves less aliasing, but is softer (default!!!) + // 0.25 leaves more aliasing, and is sharper + FxaaFloat fxaaConsoleEdgeThreshold, + // + // Only used on FXAA Console. + // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define. + // It is here now to allow easier tuning. + // Trims the algorithm from processing darks. + // The console setting has a different mapping than the quality setting. + // This only applies when FXAA_EARLY_EXIT is 1. + // This does not apply to PS3, + // PS3 was simplified to avoid more shader instructions. + // 0.06 - faster but more aliasing in darks + // 0.05 - default + // 0.04 - slower and less aliasing in darks + // Special notes when using FXAA_GREEN_AS_LUMA, + // Likely want to set this to zero. + // As colors that are mostly not-green + // will appear very dark in the green channel! + // Tune by looking at mostly non-green content, + // then start at zero and increase until aliasing is a problem. + FxaaFloat fxaaConsoleEdgeThresholdMin, + // + // Extra constants for 360 FXAA Console only. + // Use zeros or anything else for other platforms. + // These must be in physical constant registers and NOT immedates. + // Immedates will result in compiler un-optimizing. + // {xyzw} = float4(1.0, -1.0, 0.25, -0.25) + FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ + FxaaFloat2 posM; + posM.x = pos.x; + posM.y = pos.y; + #if (FXAA_GATHER4_ALPHA == 1) + #if (FXAA_DISCARD == 0) + FxaaFloat4 rgbyM = FxaaTexTop(tex, posM); + #if (FXAA_GREEN_AS_LUMA == 0) + #define lumaM rgbyM.w + #else + #define lumaM rgbyM.y + #endif + #endif + #if (FXAA_GREEN_AS_LUMA == 0) + FxaaFloat4 luma4A = FxaaTexAlpha4(tex, posM); + FxaaFloat4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1)); + #else + FxaaFloat4 luma4A = FxaaTexGreen4(tex, posM); + FxaaFloat4 luma4B = FxaaTexOffGreen4(tex, posM, FxaaInt2(-1, -1)); + #endif + #if (FXAA_DISCARD == 1) + #define lumaM luma4A.w + #endif + #define lumaE luma4A.z + #define lumaS luma4A.x + #define lumaSE luma4A.y + #define lumaNW luma4B.w + #define lumaN luma4B.z + #define lumaW luma4B.x + #else + FxaaFloat4 rgbyM = FxaaTexTop(tex, posM); + #if (FXAA_GREEN_AS_LUMA == 0) + #define lumaM rgbyM.w + #else + #define lumaM rgbyM.y + #endif + FxaaFloat lumaS = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0, 1), fxaaQualityRcpFrame.xy)); + FxaaFloat lumaE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 0), fxaaQualityRcpFrame.xy)); + FxaaFloat lumaN = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0,-1), fxaaQualityRcpFrame.xy)); + FxaaFloat lumaW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 0), fxaaQualityRcpFrame.xy)); + #endif +/*--------------------------------------------------------------------------*/ + FxaaFloat maxSM = max(lumaS, lumaM); + FxaaFloat minSM = min(lumaS, lumaM); + FxaaFloat maxESM = max(lumaE, maxSM); + FxaaFloat minESM = min(lumaE, minSM); + FxaaFloat maxWN = max(lumaN, lumaW); + FxaaFloat minWN = min(lumaN, lumaW); + FxaaFloat rangeMax = max(maxWN, maxESM); + FxaaFloat rangeMin = min(minWN, minESM); + FxaaFloat rangeMaxScaled = rangeMax * fxaaQualityEdgeThreshold; + FxaaFloat range = rangeMax - rangeMin; + FxaaFloat rangeMaxClamped = max(fxaaQualityEdgeThresholdMin, rangeMaxScaled); + FxaaBool earlyExit = range < rangeMaxClamped; +/*--------------------------------------------------------------------------*/ + if(earlyExit) + #if (FXAA_DISCARD == 1) + FxaaDiscard; + #else + return rgbyM; + #endif +/*--------------------------------------------------------------------------*/ + #if (FXAA_GATHER4_ALPHA == 0) + FxaaFloat lumaNW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1,-1), fxaaQualityRcpFrame.xy)); + FxaaFloat lumaSE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 1), fxaaQualityRcpFrame.xy)); + FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1,-1), fxaaQualityRcpFrame.xy)); + FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy)); + #else + FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(1, -1), fxaaQualityRcpFrame.xy)); + FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy)); + #endif +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaNS = lumaN + lumaS; + FxaaFloat lumaWE = lumaW + lumaE; + FxaaFloat subpixRcpRange = 1.0/range; + FxaaFloat subpixNSWE = lumaNS + lumaWE; + FxaaFloat edgeHorz1 = (-2.0 * lumaM) + lumaNS; + FxaaFloat edgeVert1 = (-2.0 * lumaM) + lumaWE; +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaNESE = lumaNE + lumaSE; + FxaaFloat lumaNWNE = lumaNW + lumaNE; + FxaaFloat edgeHorz2 = (-2.0 * lumaE) + lumaNESE; + FxaaFloat edgeVert2 = (-2.0 * lumaN) + lumaNWNE; +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaNWSW = lumaNW + lumaSW; + FxaaFloat lumaSWSE = lumaSW + lumaSE; + FxaaFloat edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2); + FxaaFloat edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2); + FxaaFloat edgeHorz3 = (-2.0 * lumaW) + lumaNWSW; + FxaaFloat edgeVert3 = (-2.0 * lumaS) + lumaSWSE; + FxaaFloat edgeHorz = abs(edgeHorz3) + edgeHorz4; + FxaaFloat edgeVert = abs(edgeVert3) + edgeVert4; +/*--------------------------------------------------------------------------*/ + FxaaFloat subpixNWSWNESE = lumaNWSW + lumaNESE; + FxaaFloat lengthSign = fxaaQualityRcpFrame.x; + FxaaBool horzSpan = edgeHorz >= edgeVert; + FxaaFloat subpixA = subpixNSWE * 2.0 + subpixNWSWNESE; +/*--------------------------------------------------------------------------*/ + if(!horzSpan) lumaN = lumaW; + if(!horzSpan) lumaS = lumaE; + if(horzSpan) lengthSign = fxaaQualityRcpFrame.y; + FxaaFloat subpixB = (subpixA * (1.0/12.0)) - lumaM; +/*--------------------------------------------------------------------------*/ + FxaaFloat gradientN = lumaN - lumaM; + FxaaFloat gradientS = lumaS - lumaM; + FxaaFloat lumaNN = lumaN + lumaM; + FxaaFloat lumaSS = lumaS + lumaM; + FxaaBool pairN = abs(gradientN) >= abs(gradientS); + FxaaFloat gradient = max(abs(gradientN), abs(gradientS)); + if(pairN) lengthSign = -lengthSign; + FxaaFloat subpixC = FxaaSat(abs(subpixB) * subpixRcpRange); +/*--------------------------------------------------------------------------*/ + FxaaFloat2 posB; + posB.x = posM.x; + posB.y = posM.y; + FxaaFloat2 offNP; + offNP.x = (!horzSpan) ? 0.0 : fxaaQualityRcpFrame.x; + offNP.y = ( horzSpan) ? 0.0 : fxaaQualityRcpFrame.y; + if(!horzSpan) posB.x += lengthSign * 0.5; + if( horzSpan) posB.y += lengthSign * 0.5; +/*--------------------------------------------------------------------------*/ + FxaaFloat2 posN; + posN.x = posB.x - offNP.x * FXAA_QUALITY__P0; + posN.y = posB.y - offNP.y * FXAA_QUALITY__P0; + FxaaFloat2 posP; + posP.x = posB.x + offNP.x * FXAA_QUALITY__P0; + posP.y = posB.y + offNP.y * FXAA_QUALITY__P0; + FxaaFloat subpixD = ((-2.0)*subpixC) + 3.0; + FxaaFloat lumaEndN = FxaaLuma(FxaaTexTop(tex, posN)); + FxaaFloat subpixE = subpixC * subpixC; + FxaaFloat lumaEndP = FxaaLuma(FxaaTexTop(tex, posP)); +/*--------------------------------------------------------------------------*/ + if(!pairN) lumaNN = lumaSS; + FxaaFloat gradientScaled = gradient * 1.0/4.0; + FxaaFloat lumaMM = lumaM - lumaNN * 0.5; + FxaaFloat subpixF = subpixD * subpixE; + FxaaBool lumaMLTZero = lumaMM < 0.0; +/*--------------------------------------------------------------------------*/ + lumaEndN -= lumaNN * 0.5; + lumaEndP -= lumaNN * 0.5; + FxaaBool doneN = abs(lumaEndN) >= gradientScaled; + FxaaBool doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1; + FxaaBool doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1; +/*--------------------------------------------------------------------------*/ + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 3) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 4) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 5) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 6) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 7) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 8) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 9) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 10) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 11) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY__PS > 12) + if(doneNP) { + if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); + if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12; +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } +/*--------------------------------------------------------------------------*/ + FxaaFloat dstN = posM.x - posN.x; + FxaaFloat dstP = posP.x - posM.x; + if(!horzSpan) dstN = posM.y - posN.y; + if(!horzSpan) dstP = posP.y - posM.y; +/*--------------------------------------------------------------------------*/ + FxaaBool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero; + FxaaFloat spanLength = (dstP + dstN); + FxaaBool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero; + FxaaFloat spanLengthRcp = 1.0/spanLength; +/*--------------------------------------------------------------------------*/ + FxaaBool directionN = dstN < dstP; + FxaaFloat dst = min(dstN, dstP); + FxaaBool goodSpan = directionN ? goodSpanN : goodSpanP; + FxaaFloat subpixG = subpixF * subpixF; + FxaaFloat pixelOffset = (dst * (-spanLengthRcp)) + 0.5; + FxaaFloat subpixH = subpixG * fxaaQualitySubpix; +/*--------------------------------------------------------------------------*/ + FxaaFloat pixelOffsetGood = goodSpan ? pixelOffset : 0.0; + FxaaFloat pixelOffsetSubpix = max(pixelOffsetGood, subpixH); + if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign; + if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign; + #if (FXAA_DISCARD == 1) + return FxaaTexTop(tex, posM); + #else + return FxaaFloat4(FxaaTexTop(tex, posM).xyz, lumaM); + #endif +} +/*==========================================================================*/ +#endif + + + + +/*============================================================================ + + FXAA3 CONSOLE - PC VERSION + +------------------------------------------------------------------------------ +Instead of using this on PC, I'd suggest just using FXAA Quality with + #define FXAA_QUALITY__PRESET 10 +Or + #define FXAA_QUALITY__PRESET 20 +Either are higher qualilty and almost as fast as this on modern PC GPUs. +============================================================================*/ +#if (FXAA_PC_CONSOLE == 1) +/*--------------------------------------------------------------------------*/ +FxaaFloat4 FxaaPixelShader( + // See FXAA Quality FxaaPixelShader() source for docs on Inputs! + FxaaFloat2 pos, + FxaaFloat4 fxaaConsolePosPos, + FxaaTex tex, + FxaaTex fxaaConsole360TexExpBiasNegOne, + FxaaTex fxaaConsole360TexExpBiasNegTwo, + FxaaFloat2 fxaaQualityRcpFrame, + FxaaFloat4 fxaaConsoleRcpFrameOpt, + FxaaFloat4 fxaaConsoleRcpFrameOpt2, + FxaaFloat4 fxaaConsole360RcpFrameOpt2, + FxaaFloat fxaaQualitySubpix, + FxaaFloat fxaaQualityEdgeThreshold, + FxaaFloat fxaaQualityEdgeThresholdMin, + FxaaFloat fxaaConsoleEdgeSharpness, + FxaaFloat fxaaConsoleEdgeThreshold, + FxaaFloat fxaaConsoleEdgeThresholdMin, + FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaNw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xy)); + FxaaFloat lumaSw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xw)); + FxaaFloat lumaNe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zy)); + FxaaFloat lumaSe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zw)); +/*--------------------------------------------------------------------------*/ + FxaaFloat4 rgbyM = FxaaTexTop(tex, pos.xy); + #if (FXAA_GREEN_AS_LUMA == 0) + FxaaFloat lumaM = rgbyM.w; + #else + FxaaFloat lumaM = rgbyM.y; + #endif +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaMaxNwSw = max(lumaNw, lumaSw); + lumaNe += 1.0/384.0; + FxaaFloat lumaMinNwSw = min(lumaNw, lumaSw); +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaMaxNeSe = max(lumaNe, lumaSe); + FxaaFloat lumaMinNeSe = min(lumaNe, lumaSe); +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaMax = max(lumaMaxNeSe, lumaMaxNwSw); + FxaaFloat lumaMin = min(lumaMinNeSe, lumaMinNwSw); +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaMaxScaled = lumaMax * fxaaConsoleEdgeThreshold; +/*--------------------------------------------------------------------------*/ + FxaaFloat lumaMinM = min(lumaMin, lumaM); + FxaaFloat lumaMaxScaledClamped = max(fxaaConsoleEdgeThresholdMin, lumaMaxScaled); + FxaaFloat lumaMaxM = max(lumaMax, lumaM); + FxaaFloat dirSwMinusNe = lumaSw - lumaNe; + FxaaFloat lumaMaxSubMinM = lumaMaxM - lumaMinM; + FxaaFloat dirSeMinusNw = lumaSe - lumaNw; + if(lumaMaxSubMinM < lumaMaxScaledClamped) return rgbyM; +/*--------------------------------------------------------------------------*/ + FxaaFloat2 dir; + dir.x = dirSwMinusNe + dirSeMinusNw; + dir.y = dirSwMinusNe - dirSeMinusNw; +/*--------------------------------------------------------------------------*/ + FxaaFloat2 dir1 = normalize(dir.xy); + FxaaFloat4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * fxaaConsoleRcpFrameOpt.zw); + FxaaFloat4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * fxaaConsoleRcpFrameOpt.zw); +/*--------------------------------------------------------------------------*/ + FxaaFloat dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * fxaaConsoleEdgeSharpness; + FxaaFloat2 dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0); +/*--------------------------------------------------------------------------*/ + FxaaFloat4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * fxaaConsoleRcpFrameOpt2.zw); + FxaaFloat4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * fxaaConsoleRcpFrameOpt2.zw); +/*--------------------------------------------------------------------------*/ + FxaaFloat4 rgbyA = rgbyN1 + rgbyP1; + FxaaFloat4 rgbyB = ((rgbyN2 + rgbyP2) * 0.25) + (rgbyA * 0.25); +/*--------------------------------------------------------------------------*/ + #if (FXAA_GREEN_AS_LUMA == 0) + FxaaBool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax); + #else + FxaaBool twoTap = (rgbyB.y < lumaMin) || (rgbyB.y > lumaMax); + #endif + if(twoTap) rgbyB.xyz = rgbyA.xyz * 0.5; + return rgbyB; } +/*==========================================================================*/ +#endif + + + +/*============================================================================ + + FXAA3 CONSOLE - 360 PIXEL SHADER + +------------------------------------------------------------------------------ +This optimized version thanks to suggestions from Andy Luedke. +Should be fully tex bound in all cases. +As of the FXAA 3.11 release, I have still not tested this code, +however I fixed a bug which was in both FXAA 3.9 and FXAA 3.10. +And note this is replacing the old unoptimized version. +If it does not work, please let me know so I can fix it. +============================================================================*/ +#if (FXAA_360 == 1) +/*--------------------------------------------------------------------------*/ +[reduceTempRegUsage(4)] +float4 FxaaPixelShader( + // See FXAA Quality FxaaPixelShader() source for docs on Inputs! + FxaaFloat2 pos, + FxaaFloat4 fxaaConsolePosPos, + FxaaTex tex, + FxaaTex fxaaConsole360TexExpBiasNegOne, + FxaaTex fxaaConsole360TexExpBiasNegTwo, + FxaaFloat2 fxaaQualityRcpFrame, + FxaaFloat4 fxaaConsoleRcpFrameOpt, + FxaaFloat4 fxaaConsoleRcpFrameOpt2, + FxaaFloat4 fxaaConsole360RcpFrameOpt2, + FxaaFloat fxaaQualitySubpix, + FxaaFloat fxaaQualityEdgeThreshold, + FxaaFloat fxaaQualityEdgeThresholdMin, + FxaaFloat fxaaConsoleEdgeSharpness, + FxaaFloat fxaaConsoleEdgeThreshold, + FxaaFloat fxaaConsoleEdgeThresholdMin, + FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ + float4 lumaNwNeSwSe; + #if (FXAA_GREEN_AS_LUMA == 0) + asm { + tfetch2D lumaNwNeSwSe.w___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false + tfetch2D lumaNwNeSwSe._w__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false + tfetch2D lumaNwNeSwSe.__w_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false + tfetch2D lumaNwNeSwSe.___w, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false + }; + #else + asm { + tfetch2D lumaNwNeSwSe.y___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false + tfetch2D lumaNwNeSwSe._y__, tex, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false + tfetch2D lumaNwNeSwSe.__y_, tex, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false + tfetch2D lumaNwNeSwSe.___y, tex, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false + }; + #endif +/*--------------------------------------------------------------------------*/ + lumaNwNeSwSe.y += 1.0/384.0; + float2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw); + float2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw); + float lumaMin = min(lumaMinTemp.x, lumaMinTemp.y); + float lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y); +/*--------------------------------------------------------------------------*/ + float4 rgbyM = tex2Dlod(tex, float4(pos.xy, 0.0, 0.0)); + #if (FXAA_GREEN_AS_LUMA == 0) + float lumaMinM = min(lumaMin, rgbyM.w); + float lumaMaxM = max(lumaMax, rgbyM.w); + #else + float lumaMinM = min(lumaMin, rgbyM.y); + float lumaMaxM = max(lumaMax, rgbyM.y); + #endif + if((lumaMaxM - lumaMinM) < max(fxaaConsoleEdgeThresholdMin, lumaMax * fxaaConsoleEdgeThreshold)) return rgbyM; +/*--------------------------------------------------------------------------*/ + float2 dir; + dir.x = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.yyxx); + dir.y = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.xyxy); + dir = normalize(dir); +/*--------------------------------------------------------------------------*/ + float4 dir1 = dir.xyxy * fxaaConsoleRcpFrameOpt.xyzw; +/*--------------------------------------------------------------------------*/ + float4 dir2; + float dirAbsMinTimesC = min(abs(dir.x), abs(dir.y)) * fxaaConsoleEdgeSharpness; + dir2 = saturate(fxaaConsole360ConstDir.zzww * dir.xyxy / dirAbsMinTimesC + 0.5); + dir2 = dir2 * fxaaConsole360RcpFrameOpt2.xyxy + fxaaConsole360RcpFrameOpt2.zwzw; +/*--------------------------------------------------------------------------*/ + float4 rgbyN1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.xy, 0.0, 0.0)); + float4 rgbyP1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.zw, 0.0, 0.0)); + float4 rgbyN2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.xy, 0.0, 0.0)); + float4 rgbyP2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.zw, 0.0, 0.0)); +/*--------------------------------------------------------------------------*/ + float4 rgbyA = rgbyN1 + rgbyP1; + float4 rgbyB = rgbyN2 + rgbyP2 + rgbyA * 0.5; +/*--------------------------------------------------------------------------*/ + float4 rgbyR = ((FxaaLuma(rgbyB) - lumaMax) > 0.0) ? rgbyA : rgbyB; + rgbyR = ((FxaaLuma(rgbyB) - lumaMin) > 0.0) ? rgbyR : rgbyA; + return rgbyR; } +/*==========================================================================*/ +#endif + + + +/*============================================================================ + + FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT) + +============================================================================== +The code below does not exactly match the assembly. +I have a feeling that 12 cycles is possible, but was not able to get there. +Might have to increase register count to get full performance. +Note this shader does not use perspective interpolation. + +Use the following cgc options, + + --fenable-bx2 --fastmath --fastprecision --nofloatbindings + +------------------------------------------------------------------------------ + NVSHADERPERF OUTPUT +------------------------------------------------------------------------------ +For reference and to aid in debug, output of NVShaderPerf should match this, + +Shader to schedule: + 0: texpkb h0.w(TRUE), v5.zyxx, #0 + 2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x + 4: texpkb h0.w(TRUE), v5.xwxx, #0 + 6: addh h0.z(TRUE), -h2, h0.w + 7: texpkb h1.w(TRUE), v5, #0 + 9: addh h0.x(TRUE), h0.z, -h1.w + 10: addh h3.w(TRUE), h0.z, h1 + 11: texpkb h2.w(TRUE), v5.zwzz, #0 + 13: addh h0.z(TRUE), h3.w, -h2.w + 14: addh h0.x(TRUE), h2.w, h0 + 15: nrmh h1.xz(TRUE), h0_n + 16: minh_m8 h0.x(TRUE), |h1|, |h1.z| + 17: maxh h4.w(TRUE), h0, h1 + 18: divx h2.xy(TRUE), h1_n.xzzw, h0_n + 19: movr r1.zw(TRUE), v4.xxxy + 20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww + 22: minh h5.w(TRUE), h0, h1 + 23: texpkb h0(TRUE), r2.xzxx, #0 + 25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1 + 27: maxh h4.x(TRUE), h2.z, h2.w + 28: texpkb h1(TRUE), r0.zwzz, #0 + 30: addh_d2 h1(TRUE), h0, h1 + 31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 33: texpkb h0(TRUE), r0, #0 + 35: minh h4.z(TRUE), h2, h2.w + 36: fenct TRUE + 37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 39: texpkb h2(TRUE), r1, #0 + 41: addh_d2 h0(TRUE), h0, h2 + 42: maxh h2.w(TRUE), h4, h4.x + 43: minh h2.x(TRUE), h5.w, h4.z + 44: addh_d2 h0(TRUE), h0, h1 + 45: slth h2.x(TRUE), h0.w, h2 + 46: sgth h2.w(TRUE), h0, h2 + 47: movh h0(TRUE), h0 + 48: addx.c0 rc(TRUE), h2, h2.w + 49: movh h0(c0.NE.x), h1 + +IPU0 ------ Simplified schedule: -------- +Pass | Unit | uOp | PC: Op +-----+--------+------+------------------------- + 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; + | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; + | SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-; + | | | + 2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0; + | TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0; + | SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-; + | | | + 3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0; + | TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0; + | SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---; + | SCB1 | add | 10: ADDh h3.w, h0.---z, h1; + | | | + 4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; + | TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; + | SCB0 | add | 14: ADDh h0.x, h2.w---, h0; + | SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-; + | | | + 5 | SCT1 | mov | 15: NRMh h1.xz, h0; + | SRB | nrm | 15: NRMh h1.xz, h0; + | SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|; + | SCB1 | max | 17: MAXh h4.w, h0, h1; + | | | + 6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0; + | SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy; + | SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-; + | SCB1 | min | 22: MINh h5.w, h0, h1; + | | | + 7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0; + | TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0; + | SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---; + | SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1; + | | | + 8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0; + | TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0; + | SCB0/1 | add | 30: ADDh/2 h1, h0, h1; + | | | + 9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--; + | SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0; + | TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0; + | SCB1 | min | 35: MINh h4.z, h2, h2.--w-; + | | | + 10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--; + | SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0; + | TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0; + | SCB0/1 | add | 41: ADDh/2 h0, h0, h2; + | | | + 11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---; + | SCT1 | max | 42: MAXh h2.w, h4, h4.---x; + | SCB0/1 | add | 44: ADDh/2 h0, h0, h1; + | | | + 12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2; + | SCT1 | set | 46: SGTh h2.w, h0, h2; + | SCB0/1 | mul | 47: MOVh h0, h0; + | | | + 13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---; + | SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1; + +Pass SCT TEX SCB + 1: 0% 100% 25% + 2: 0% 100% 25% + 3: 0% 100% 50% + 4: 0% 100% 50% + 5: 0% 0% 50% + 6: 100% 0% 75% + 7: 0% 100% 75% + 8: 0% 100% 100% + 9: 0% 100% 25% + 10: 0% 100% 100% + 11: 50% 0% 100% + 12: 50% 0% 100% + 13: 25% 0% 100% + +MEAN: 17% 61% 67% + +Pass SCT0 SCT1 TEX SCB0 SCB1 + 1: 0% 0% 100% 0% 100% + 2: 0% 0% 100% 0% 100% + 3: 0% 0% 100% 100% 100% + 4: 0% 0% 100% 100% 100% + 5: 0% 0% 0% 100% 100% + 6: 100% 100% 0% 100% 100% + 7: 0% 0% 100% 100% 100% + 8: 0% 0% 100% 100% 100% + 9: 0% 0% 100% 0% 100% + 10: 0% 0% 100% 100% 100% + 11: 100% 100% 0% 100% 100% + 12: 100% 100% 0% 100% 100% + 13: 100% 0% 0% 100% 100% + +MEAN: 30% 23% 61% 76% 100% +Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5 +Results 13 cycles, 3 r regs, 923,076,923 pixels/s +============================================================================*/ +#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0) +/*--------------------------------------------------------------------------*/ +#pragma regcount 7 +#pragma disablepc all +#pragma option O3 +#pragma option OutColorPrec=fp16 +#pragma texformat default RGBA8 +/*==========================================================================*/ +half4 FxaaPixelShader( + // See FXAA Quality FxaaPixelShader() source for docs on Inputs! + FxaaFloat2 pos, + FxaaFloat4 fxaaConsolePosPos, + FxaaTex tex, + FxaaTex fxaaConsole360TexExpBiasNegOne, + FxaaTex fxaaConsole360TexExpBiasNegTwo, + FxaaFloat2 fxaaQualityRcpFrame, + FxaaFloat4 fxaaConsoleRcpFrameOpt, + FxaaFloat4 fxaaConsoleRcpFrameOpt2, + FxaaFloat4 fxaaConsole360RcpFrameOpt2, + FxaaFloat fxaaQualitySubpix, + FxaaFloat fxaaQualityEdgeThreshold, + FxaaFloat fxaaQualityEdgeThresholdMin, + FxaaFloat fxaaConsoleEdgeSharpness, + FxaaFloat fxaaConsoleEdgeThreshold, + FxaaFloat fxaaConsoleEdgeThresholdMin, + FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ +// (1) + half4 dir; + half4 lumaNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + lumaNe.w += half(1.0/512.0); + dir.x = -lumaNe.w; + dir.z = -lumaNe.w; + #else + lumaNe.y += half(1.0/512.0); + dir.x = -lumaNe.y; + dir.z = -lumaNe.y; + #endif +/*--------------------------------------------------------------------------*/ +// (2) + half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + dir.x += lumaSw.w; + dir.z += lumaSw.w; + #else + dir.x += lumaSw.y; + dir.z += lumaSw.y; + #endif +/*--------------------------------------------------------------------------*/ +// (3) + half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + dir.x -= lumaNw.w; + dir.z += lumaNw.w; + #else + dir.x -= lumaNw.y; + dir.z += lumaNw.y; + #endif +/*--------------------------------------------------------------------------*/ +// (4) + half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + dir.x += lumaSe.w; + dir.z -= lumaSe.w; + #else + dir.x += lumaSe.y; + dir.z -= lumaSe.y; + #endif +/*--------------------------------------------------------------------------*/ +// (5) + half4 dir1_pos; + dir1_pos.xy = normalize(dir.xyz).xz; + half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS); +/*--------------------------------------------------------------------------*/ +// (6) + half4 dir2_pos; + dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0)); + dir1_pos.zw = pos.xy; + dir2_pos.zw = pos.xy; + half4 temp1N; + temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; +/*--------------------------------------------------------------------------*/ +// (7) + temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0)); + half4 rgby1; + rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; +/*--------------------------------------------------------------------------*/ +// (8) + rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0)); + rgby1 = (temp1N + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (9) + half4 temp2N; + temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; + temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0)); +/*--------------------------------------------------------------------------*/ +// (10) + half4 rgby2; + rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; + rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0)); + rgby2 = (temp2N + rgby2) * 0.5; +/*--------------------------------------------------------------------------*/ +// (11) + // compilier moves these scalar ops up to other cycles + #if (FXAA_GREEN_AS_LUMA == 0) + half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w)); + half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w)); + #else + half lumaMin = min(min(lumaNw.y, lumaSw.y), min(lumaNe.y, lumaSe.y)); + half lumaMax = max(max(lumaNw.y, lumaSw.y), max(lumaNe.y, lumaSe.y)); + #endif + rgby2 = (rgby2 + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (12) + #if (FXAA_GREEN_AS_LUMA == 0) + bool twoTapLt = rgby2.w < lumaMin; + bool twoTapGt = rgby2.w > lumaMax; + #else + bool twoTapLt = rgby2.y < lumaMin; + bool twoTapGt = rgby2.y > lumaMax; + #endif +/*--------------------------------------------------------------------------*/ +// (13) + if(twoTapLt || twoTapGt) rgby2 = rgby1; +/*--------------------------------------------------------------------------*/ + return rgby2; } +/*==========================================================================*/ +#endif + + + +/*============================================================================ + + FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT) + +============================================================================== +The code mostly matches the assembly. +I have a feeling that 14 cycles is possible, but was not able to get there. +Might have to increase register count to get full performance. +Note this shader does not use perspective interpolation. + +Use the following cgc options, + + --fenable-bx2 --fastmath --fastprecision --nofloatbindings + +Use of FXAA_GREEN_AS_LUMA currently adds a cycle (16 clks). +Will look at fixing this for FXAA 3.12. +------------------------------------------------------------------------------ + NVSHADERPERF OUTPUT +------------------------------------------------------------------------------ +For reference and to aid in debug, output of NVShaderPerf should match this, + +Shader to schedule: + 0: texpkb h0.w(TRUE), v5.zyxx, #0 + 2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x + 4: texpkb h1.w(TRUE), v5.xwxx, #0 + 6: addh h0.x(TRUE), h1.w, -h2.y + 7: texpkb h2.w(TRUE), v5.zwzz, #0 + 9: minh h4.w(TRUE), h2.y, h2 + 10: maxh h5.x(TRUE), h2.y, h2.w + 11: texpkb h0.w(TRUE), v5, #0 + 13: addh h3.w(TRUE), -h0, h0.x + 14: addh h0.x(TRUE), h0.w, h0 + 15: addh h0.z(TRUE), -h2.w, h0.x + 16: addh h0.x(TRUE), h2.w, h3.w + 17: minh h5.y(TRUE), h0.w, h1.w + 18: nrmh h2.xz(TRUE), h0_n + 19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z| + 20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w + 21: movr r1.zw(TRUE), v4.xxxy + 22: maxh h2.w(TRUE), h0, h1 + 23: fenct TRUE + 24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz + 26: texpkb h0(TRUE), r0, #0 + 28: maxh h5.x(TRUE), h2.w, h5 + 29: minh h5.w(TRUE), h5.y, h4 + 30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz + 32: texpkb h2(TRUE), r1, #0 + 34: addh_d2 h2(TRUE), h0, h2 + 35: texpkb h1(TRUE), v4, #0 + 37: maxh h5.y(TRUE), h5.x, h1.w + 38: minh h4.w(TRUE), h1, h5 + 39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 41: texpkb h0(TRUE), r0, #0 + 43: addh_m8 h5.z(TRUE), h5.y, -h4.w + 44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 46: texpkb h3(TRUE), r2, #0 + 48: addh_d2 h0(TRUE), h0, h3 + 49: addh_d2 h3(TRUE), h0, h2 + 50: movh h0(TRUE), h3 + 51: slth h3.x(TRUE), h3.w, h5.w + 52: sgth h3.w(TRUE), h3, h5.x + 53: addx.c0 rc(TRUE), h3.x, h3 + 54: slth.c0 rc(TRUE), h5.z, h5 + 55: movh h0(c0.NE.w), h2 + 56: movh h0(c0.NE.x), h1 + +IPU0 ------ Simplified schedule: -------- +Pass | Unit | uOp | PC: Op +-----+--------+------+------------------------- + 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; + | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; + | SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--; + | | | + 2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0; + | TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0; + | SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---; + | | | + 3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; + | TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; + | SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---; + | SCB1 | min | 9: MINh h4.w, h2.---y, h2; + | | | + 4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0; + | TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0; + | SCB0 | add | 14: ADDh h0.x, h0.w---, h0; + | SCB1 | add | 13: ADDh h3.w,-h0, h0.---x; + | | | + 5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---; + | SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-; + | SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--; + | | | + 6 | SCT1 | mov | 18: NRMh h2.xz, h0; + | SRB | nrm | 18: NRMh h2.xz, h0; + | SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|; + | | | + 7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--; + | SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy; + | SCB1 | max | 22: MAXh h2.w, h0, h1; + | | | + 8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--; + | SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0; + | TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0; + | SCB0 | max | 28: MAXh h5.x, h2.w---, h5; + | SCB1 | min | 29: MINh h5.w, h5.---y, h4; + | | | + 9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--; + | SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0; + | TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0; + | SCB0/1 | add | 34: ADDh/2 h2, h0, h2; + | | | + 10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0; + | TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0; + | SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--; + | SCB1 | min | 38: MINh h4.w, h1, h5; + | | | + 11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--; + | SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0; + | TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0; + | SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--; + | SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-; + | | | + 12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0; + | TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0; + | SCB0/1 | add | 48: ADDh/2 h0, h0, h3; + | | | + 13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2; + | SCB0/1 | mul | 50: MOVh h0, h3; + | | | + 14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---; + | SCT1 | set | 52: SGTh h3.w, h3, h5.---x; + | SCB0 | set | 54: SLThc0 rc, h5.z---, h5; + | SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3; + | | | + 15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2; + | SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1; + +Pass SCT TEX SCB + 1: 0% 100% 25% + 2: 0% 100% 25% + 3: 0% 100% 50% + 4: 0% 100% 50% + 5: 50% 0% 25% + 6: 0% 0% 25% + 7: 100% 0% 25% + 8: 0% 100% 50% + 9: 0% 100% 100% + 10: 0% 100% 50% + 11: 0% 100% 75% + 12: 0% 100% 100% + 13: 100% 0% 100% + 14: 50% 0% 50% + 15: 100% 0% 100% + +MEAN: 26% 60% 56% + +Pass SCT0 SCT1 TEX SCB0 SCB1 + 1: 0% 0% 100% 100% 0% + 2: 0% 0% 100% 100% 0% + 3: 0% 0% 100% 100% 100% + 4: 0% 0% 100% 100% 100% + 5: 100% 100% 0% 100% 0% + 6: 0% 0% 0% 0% 100% + 7: 100% 100% 0% 0% 100% + 8: 0% 0% 100% 100% 100% + 9: 0% 0% 100% 100% 100% + 10: 0% 0% 100% 100% 100% + 11: 0% 0% 100% 100% 100% + 12: 0% 0% 100% 100% 100% + 13: 100% 100% 0% 100% 100% + 14: 100% 100% 0% 100% 100% + 15: 100% 100% 0% 100% 100% + +MEAN: 33% 33% 60% 86% 80% +Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5 +Results 15 cycles, 3 r regs, 800,000,000 pixels/s +============================================================================*/ +#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1) +/*--------------------------------------------------------------------------*/ +#pragma regcount 7 +#pragma disablepc all +#pragma option O2 +#pragma option OutColorPrec=fp16 +#pragma texformat default RGBA8 +/*==========================================================================*/ +half4 FxaaPixelShader( + // See FXAA Quality FxaaPixelShader() source for docs on Inputs! + FxaaFloat2 pos, + FxaaFloat4 fxaaConsolePosPos, + FxaaTex tex, + FxaaTex fxaaConsole360TexExpBiasNegOne, + FxaaTex fxaaConsole360TexExpBiasNegTwo, + FxaaFloat2 fxaaQualityRcpFrame, + FxaaFloat4 fxaaConsoleRcpFrameOpt, + FxaaFloat4 fxaaConsoleRcpFrameOpt2, + FxaaFloat4 fxaaConsole360RcpFrameOpt2, + FxaaFloat fxaaQualitySubpix, + FxaaFloat fxaaQualityEdgeThreshold, + FxaaFloat fxaaQualityEdgeThresholdMin, + FxaaFloat fxaaConsoleEdgeSharpness, + FxaaFloat fxaaConsoleEdgeThreshold, + FxaaFloat fxaaConsoleEdgeThresholdMin, + FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ +// (1) + half4 rgbyNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + half lumaNe = rgbyNe.w + half(1.0/512.0); + #else + half lumaNe = rgbyNe.y + half(1.0/512.0); + #endif +/*--------------------------------------------------------------------------*/ +// (2) + half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + half lumaSwNegNe = lumaSw.w - lumaNe; + #else + half lumaSwNegNe = lumaSw.y - lumaNe; + #endif +/*--------------------------------------------------------------------------*/ +// (3) + half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + half lumaMaxNwSw = max(lumaNw.w, lumaSw.w); + half lumaMinNwSw = min(lumaNw.w, lumaSw.w); + #else + half lumaMaxNwSw = max(lumaNw.y, lumaSw.y); + half lumaMinNwSw = min(lumaNw.y, lumaSw.y); + #endif +/*--------------------------------------------------------------------------*/ +// (4) + half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0)); + #if (FXAA_GREEN_AS_LUMA == 0) + half dirZ = lumaNw.w + lumaSwNegNe; + half dirX = -lumaNw.w + lumaSwNegNe; + #else + half dirZ = lumaNw.y + lumaSwNegNe; + half dirX = -lumaNw.y + lumaSwNegNe; + #endif +/*--------------------------------------------------------------------------*/ +// (5) + half3 dir; + dir.y = 0.0; + #if (FXAA_GREEN_AS_LUMA == 0) + dir.x = lumaSe.w + dirX; + dir.z = -lumaSe.w + dirZ; + half lumaMinNeSe = min(lumaNe, lumaSe.w); + #else + dir.x = lumaSe.y + dirX; + dir.z = -lumaSe.y + dirZ; + half lumaMinNeSe = min(lumaNe, lumaSe.y); + #endif +/*--------------------------------------------------------------------------*/ +// (6) + half4 dir1_pos; + dir1_pos.xy = normalize(dir).xz; + half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS); +/*--------------------------------------------------------------------------*/ +// (7) + half4 dir2_pos; + dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0)); + dir1_pos.zw = pos.xy; + dir2_pos.zw = pos.xy; + #if (FXAA_GREEN_AS_LUMA == 0) + half lumaMaxNeSe = max(lumaNe, lumaSe.w); + #else + half lumaMaxNeSe = max(lumaNe, lumaSe.y); + #endif +/*--------------------------------------------------------------------------*/ +// (8) + half4 temp1N; + temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; + temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0)); + half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe); + half lumaMin = min(lumaMinNwSw, lumaMinNeSe); +/*--------------------------------------------------------------------------*/ +// (9) + half4 rgby1; + rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; + rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0)); + rgby1 = (temp1N + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (10) + half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0)); + #if (FXAA_GREEN_AS_LUMA == 0) + half lumaMaxM = max(lumaMax, rgbyM.w); + half lumaMinM = min(lumaMin, rgbyM.w); + #else + half lumaMaxM = max(lumaMax, rgbyM.y); + half lumaMinM = min(lumaMin, rgbyM.y); + #endif +/*--------------------------------------------------------------------------*/ +// (11) + half4 temp2N; + temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; + temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0)); + half4 rgby2; + rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; + half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE__PS3_EDGE_THRESHOLD; +/*--------------------------------------------------------------------------*/ +// (12) + rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0)); + rgby2 = (temp2N + rgby2) * 0.5; +/*--------------------------------------------------------------------------*/ +// (13) + rgby2 = (rgby2 + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (14) + #if (FXAA_GREEN_AS_LUMA == 0) + bool twoTapLt = rgby2.w < lumaMin; + bool twoTapGt = rgby2.w > lumaMax; + #else + bool twoTapLt = rgby2.y < lumaMin; + bool twoTapGt = rgby2.y > lumaMax; + #endif + bool earlyExit = lumaRangeM < lumaMax; + bool twoTap = twoTapLt || twoTapGt; +/*--------------------------------------------------------------------------*/ +// (15) + if(twoTap) rgby2 = rgby1; + if(earlyExit) rgby2 = rgbyM; +/*--------------------------------------------------------------------------*/ + return rgby2; } +/*==========================================================================*/ +#endif + diff --git a/DATA/dsfix/VSSAO.fx b/DATA/dsfix/VSSAO.fx index 36b13b5..8bf215a 100644 --- a/DATA/dsfix/VSSAO.fx +++ b/DATA/dsfix/VSSAO.fx @@ -7,8 +7,8 @@ extern float scale = 1; //downsampling scale, 1 is highest quality but slowest extern float aoRadiusMultiplier = 1.0; //Linearly multiplies the radius of the AO Sampling -extern float ThicknessModel = 40.0; //units in space the AO assumes objects' thicknesses are -extern float FOV = 75; //Field of View in Degrees +extern float ThicknessModel = 25.0; //units in space the AO assumes objects' thicknesses are +extern float FOV = 85; //Field of View in Degrees extern float luminosity_threshold = 0.3; #ifndef SSAO_STRENGTH_LOW @@ -36,12 +36,11 @@ extern float aoStrengthMultiplier = 1.2; #define LUMINANCE_CONSIDERATION //comment this line to not take pixel brightness into account -//#define RAW_SSAO //uncomment this line to show the raw ssao /***End Of User-controlled Variables***/ static float2 rcpres = PIXEL_SIZE; static float aspect = rcpres.y/rcpres.x; -static const float nearZ = 10.0; +static const float nearZ = 1.0; static const float farZ = 5000.0; static const float2 g_InvFocalLen = { tan(0.5f*radians(FOV)) / rcpres.y * rcpres.x, tan(0.5f*radians(FOV)) }; static const float depthRange = nearZ-farZ; @@ -78,14 +77,6 @@ sampler passSampler = sampler_state MAGFILTER = LINEAR; }; -sampler thicknessSampler = sampler_state -{ - texture = ; - AddressU = CLAMP; - MINFILTER = LINEAR; - MAGFILTER = LINEAR; -}; - struct VSOUT { float4 vertPos : POSITION; @@ -139,20 +130,20 @@ static float sample_radius[N_SAMPLES] = float2 rand(in float2 uv) { float noiseX = (frac(sin(dot(uv, float2(12.9898,78.233)*2.0)) * 43758.5453)); - //float noiseY = sqrt(1-noiseX*noiseX); - float noiseY = (frac(sin(dot(uv, float2(12.9898,78.233))) * 43758.5453)); + float noiseY = sqrt(1-noiseX*noiseX); + //float noiseY = (frac(sin(dot(uv, float2(12.9898,78.233))) * 43758.5453)); return float2(noiseX, noiseY); } float readDepth(in float2 coord : TEXCOORD0) { float4 col = tex2D(depthSampler, coord); - float posZ = clamp((((1.0-col.z)/(256.0) + (1.0-col.y) + (1.0-col.x)*256))/5.0, 0.0, 1.0); + //float posZ = clamp((((1.0-col.z)/(256.0) + (1.0-col.y) + (1.0-col.x)*256))/5.0, 0.0, 1.0); //float posZ = ((1.0-col.y) + 256.0*(1.0-col.x))/(5.0); - return ((2.0f * nearZ) / (nearZ + farZ - posZ * (farZ - nearZ)))*20.0; - //return log(pow(exp(((2.0f * nearZ) / (nearZ + farZ - posZ * (farZ - nearZ)))),20)); - //return posZ / (depthRange - (depthRange - 1.0)*posZ); - //float posZ = ((1.0-col.y) + 256.0*(1.0-col.x)); - //return posZ; + //return ((2.0f * nearZ) / (nearZ + farZ - posZ * (farZ - nearZ)))*20.0; + + float posZ = ((1.0-col.z) + (1.0-col.y)*256.0 + (1.0-col.x)*(257.0*256.0)); + //return ((2.0f * nearZ) / (nearZ + farZ - posZ * (farZ - nearZ))); + return (posZ-nearZ)/farZ; } float3 getPosition(in float2 uv, in float eye_z) { @@ -161,10 +152,6 @@ float3 getPosition(in float2 uv, in float eye_z) { return pos; } -float getThicknessModel(in float depth) { - return ThicknessModel * tex1D(thicknessSampler, (depth-0.086)*20).r; -} - float4 ssao_Main( VSOUT IN ) : COLOR0 { clip(1/scale-IN.UVCoord.x); clip(1/scale-IN.UVCoord.y); @@ -196,14 +183,14 @@ float4 ssao_Main( VSOUT IN ) : COLOR0 { float curr_sample_depth = depthRange*readDepth(sample_coords); ao += clamp(0,curr_sample_radius+sample_center_depth-curr_sample_depth,2*curr_sample_radius); - ao -= clamp(0,curr_sample_radius+sample_center_depth-curr_sample_depth-getThicknessModel(depth),2*curr_sample_radius); + ao -= clamp(0,curr_sample_radius+sample_center_depth-curr_sample_depth-ThicknessModel,2*curr_sample_radius); s += 2*curr_sample_radius; } ao /= s; // adjust for close and far away - //if(depth<0.08) ao = lerp(ao, 0.0, (0.08-depth)*25); + if(depth<0.1) ao = lerp(ao, 0.0, (0.1-depth)*10.0); //if(depth>0.1) ao = lerp(ao, 0.0, 1.0); ao = 1.0-ao*aoStrengthMultiplier; @@ -212,11 +199,6 @@ float4 ssao_Main( VSOUT IN ) : COLOR0 { return float4(ao,ao,ao,1); } -float pureDepth(in float2 coord : TEXCOORD0) { - float4 dcol = tex2D(depthSampler, coord); - return (((1.0-dcol.z)/(256.0) + (1.0-dcol.y) + (1.0-dcol.x)*256))/8.0; -} - float4 HBlur( VSOUT IN ) : COLOR0 { float color = tex2D(passSampler, IN.UVCoord).r; @@ -226,15 +208,7 @@ float4 HBlur( VSOUT IN ) : COLOR0 { blurred += tex2D(passSampler, IN.UVCoord + float2(rcpres.x*3.2307692308, 0)).r * 0.0702702703; blurred += tex2D(passSampler, IN.UVCoord - float2(rcpres.x*3.2307692308, 0)).r * 0.0702702703; - //return color; return blurred; - - //float centerDepth = pureDepth(IN.UVCoord); - //float factor = 0.0; - //factor += abs(centerDepth - pureDepth(IN.UVCoord + float2(rcpres.x, 0)))*5.0; - //factor += abs(centerDepth - pureDepth(IN.UVCoord - float2(rcpres.x, 0)))*5.0; - - //return lerp(blurred, color, factor); } float4 VBlur( VSOUT IN ) : COLOR0 { @@ -246,15 +220,7 @@ float4 VBlur( VSOUT IN ) : COLOR0 { blurred += tex2D(passSampler, IN.UVCoord + float2(0, rcpres.y*3.2307692308)).r * 0.0702702703; blurred += tex2D(passSampler, IN.UVCoord - float2(0, rcpres.y*3.2307692308)).r * 0.0702702703; - //return color; return blurred; - - //float centerDepth = pureDepth(IN.UVCoord); - //float factor = 0.0; - //factor += abs(centerDepth - pureDepth(IN.UVCoord + float2(0, rcpres.y)))*5.0; - //factor += abs(centerDepth - pureDepth(IN.UVCoord - float2(0, rcpres.y)))*5.0; - - //return lerp(blurred, color, factor); } float4 Combine( VSOUT IN ) : COLOR0 { diff --git a/DATA/dsfix/vssaothicknessmodel.png b/DATA/dsfix/vssaothicknessmodel.png deleted file mode 100644 index bcfb6e369499b3816aaebd925a21590dd3f11a15..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3726 zcmV;94sr2`P)KLZ*U+IBfRsybQWXdwQbLP>6pAqfylh#{fb6;Z(vMMVS~$e@S=j*ftg6;Uhf59&ghTmgWD0l;*T zI709Y^p6lP1rIRMx#05C~cW=H_Aw*bJ-5DT&Z2n+x)QHX^p z00esgV8|mQcmRZ%02D^@S3L16t`O%c004NIvOKvYIYoh62rY33S640`D9%Y2D-rV&neh&#Q1i z007~1e$oCcFS8neI|hJl{-P!B1ZZ9hpmq0)X0i`JwE&>$+E?>%_LC6RbVIkUx0b+_+BaR3cnT7Zv!AJxW zizFb)h!jyGOOZ85F;a?DAXP{m@;!0_IfqH8(HlgRxt7s3}k3K`kFu>>-2Q$QMFfPW!La{h336o>X zu_CMttHv6zR;&ZNiS=X8v3CR#fknUxHUxJ0uoBa_M6WNWeqIg~6QE69c9o#eyhGvpiOA@W-aonk<7r1(?fC{oI5N*U!4 zfg=2N-7=cNnjjOr{yriy6mMFgG#l znCF=fnQv8CDz++o6_Lscl}eQ+l^ZHARH>?_s@|##Rr6KLRFA1%Q+=*RRWnoLsR`7U zt5vFIcfW3@?wFpwUVxrVZ>QdQz32KIeJ}k~{cZZE^+ya? z2D1z#2HOnI7(B%_ac?{wFUQ;QQA1tBKtrWrm0_3Rgps+?Jfqb{jYbcQX~taRB;#$y zZN{S}1|}gUOHJxc?wV3fxuz+mJ4`!F$IZ;mqRrNsHJd##*D~ju=bP7?-?v~|cv>vB zsJ6IeNwVZxrdjT`yl#bBIa#GxRa#xMMy;K#CDyyGyQdMSxlWT#tDe?p!?5wT$+oGt z8L;Kp2HUQ-ZMJ=3XJQv;x5ci*?vuTfeY$;({XGW_huIFR9a(?@3)XSs8O^N5RyOM=TTmp(3=8^+zpz2r)C z^>JO{deZfso3oq3?Wo(Y?l$ge?uXo;%ru`Vo>?<<(8I_>;8Eq#KMS9gFl*neeosSB zfoHYnBQIkwkyowPu(zdms`p{<7e4kra-ZWq<2*OsGTvEV%s0Td$hXT+!*8Bnh2KMe zBmZRodjHV?r+_5^X9J0WL4jKW`}lf%A-|44I@@LTvf1rHjG(ze6+w@Jt%Bvjts!X0 z?2xS?_ve_-kiKB_KiJlZ$9G`c^=E@oNG)mWWaNo-3TIW8)$Hg0Ub-~8?KhvJ>$ z3*&nim@mj(aCxE5!t{lw7O5^0EIO7zOo&c6l<+|iDySBWCGrz@C5{St!X3hAA}`T4 z(TLbXTq+(;@<=L8dXnssyft|w#WSTW<++3>sgS%(4NTpeI-VAqb|7ssJvzNHgOZVu zaYCvgO_R1~>SyL=cFU|~g|hy|Zi}}s9+d~lYqOB71z9Z$wnC=pR9Yz4DhIM>Wmjgu z&56o6maCpC&F##y%G;1PobR9i?GnNg;gYtchD%p19a!eQtZF&3JaKv33gZ<8D~47E ztUS1iwkmDaPpj=$m#%)jCVEY4fnLGNg2A-`YwHVD3gv};>)hAvT~AmqS>Lr``i7kw zJ{5_It`yrBmlc25DBO7E8;5VoznR>Ww5hAaxn$2~(q`%A-YuS64wkBy=9dm`4cXeX z4c}I@?e+FW+b@^RDBHV(wnMq2zdX3SWv9u`%{xC-q*U}&`cyXV(%rRT*Z6MH?i+i& z_B8C(+grT%{XWUQ+f@NoP1R=AW&26{v-dx)iK^-Nmiuj8txj!m?Z*Ss1N{dh4z}01 z)YTo*JycSU)+_5r4#yw9{+;i4Ee$peRgIj+;v;ZGdF1K$3E%e~4LaI(jC-u%2h$&R z9cLXcYC@Xwnns&bn)_Q~Te?roKGD|d-g^8;+aC{{G(1^(O7m37Y1-+6)01cN&y1aw zoqc{T`P^XJqPBbIW6s}d4{z_f5Om?vMgNQEJG?v2T=KYd^0M3I6IZxbny)%vZR&LD zJpPl@Psh8QyPB@KTx+@RdcC!KX7}kEo;S|j^u2lU7XQ}Oo;f|;z4Ll+_r>@1-xl3| zawq-H%e&ckC+@AhPrP6BKT#_XdT7&;F71j}Joy zkC~6lh7E@6o;W@^IpRNZ{ptLtL(gQ-CY~4mqW;US7Zxvm_|@yz&e53Bp_lTPlfP|z zrTyx_>lv@x#=^!PzR7qqF<$gm`|ZJZ+;<)Cqu&ot2z=0000WV@Og>004R=004l4008;_004mL004C`008P>0026e000+nl3&F} z000BINkl?1_{WFjKg(tv zk`R#?GqV``+r3D_%%G~9oY`{6*Hu-h>NfYAtyzxb<^xHTQXnEBXJ+x9_sZ+#Uw-~% z)8F8KV(fPMw}`~gZTI(DD@daEzR9X8?)!$B;kvGnyYCyS8ggca-g}GbWhwtYc>%BiXcgDf-a5=Z@#MZ}4De>-Wxeacg>y%Q^ zTH7!gu_!PbW2X;}aput$V-mHsEw3^keLa$>wc`DLOOb4>#&q2so@Uq0Si3yj-ECbF1+ve#={~7*Xu%SEp!5J5*}sul_C%TwKbGlP)iMbU9X5hO+oIpmH>={M*uzj z9z65!@h<=rz@86qA7X3_u$e}ZVcSa@6r9;@F_GT}qeU@ZreIiqAY{OiNz-GNNRq*V8(mJ{R zMg5oIXy%591Yh?vb#P|9F!s&!fO8ozoAnwN@%P+j;`??+j!fImYRq^6yPfCRvG*`3 zCQLZMkXb&sMsU_cC)A^*igZrvP#)*|92ofvJ8UFIcvk;0+>=$0XH8Vot(RT+OgwYF z09O7_yglzXT)-`F$d%U~cMucn$&-oKR*NRSGVM_t-m(;!EnB;DZA{1yUTHR8gQ zcPQ1?gcj}%toxRg0RR630Kml@*UWLue*gdg07*qoM6N<$f^}~g8vpfm|qoAOl3?FsBJe1PKgB5P<|HK!A`*I1(@% znF9V*lp}efF0Md+*SK$835UMjeIn7t6@AE|`1UC-uxla`&}`K|GcdBc`gVQW^S-~U ztE#KJtGcUu?0;0-FRQob1ddm!R9=Md`uh5)Qt0F(MFRvw+E<$|9659fN|-$jR|%`Y ziakOBp3F*vBCPhx*TE7JMG~+mF$U8Fb-zMTsYLETB@ICgs2fBS*;a~)=oCUQA)DY& zU_pVaj;8uSLJ(m9VK_lc2qdHvc#;uR4<%$0Mi97v2GtJ`LJ4}pFhUl=SEZ^`nTZ%e z2q%OQ5((c@r&Ow=2%`u_!f1kp@GwC~;B^fqc{ z%puGr%p=SvR1g*r9w#g$JV97QSd7PB84~^qQ9Rxns%r^r2}rOL9GXS_wk*wPcg4EX z8S8mJ73)CHShH;|1MkHAhhoic;aK-0+#BnO#Pf_bTe~;b7Sj2#ShK#FJe&{5SX4!^%NI9jp{NNJ)PX=(9IEHicuROZUyt#yFJ^+s$Htw6&=y;OCxmqDj_uM`l3=-~u-ls2W;O|EJ{+Sc z*$x-zRA$o1|DM+2E^PuaIC0#u+kM1{qJJesVYPpOmi*Pn3CAtg)KQ};27CSUMa~s& zKH3888fIMg!d1bEa&*vlfcp-K*lG&L&dFJ7E3K>^iv~@SPln`xW3ai+93en9?hlSp z2H|AnHm06R%8ht4C5gHhs~=Um)*MQ~)^IX5T*Q<8($tR;cLOpvI#S_kd1`#<+-7`d zkqYMl=8aAaluglJ0=B#piIL@hHAd#I?J&hAhbR7K&NJsL%fm<`ld`)OEwik~Q7^=H zRX(x4|AZxbeqB29U`95(gXgK?ps^s3G8nM0E3dT25_H6&W zF7Cs^f~IT1>6g-R*_JJitnngeF?T13!Ro96(TiEMdWgmzYOZ%R(KghsvrwZ^ppX3R zNH5V*n5$h%4NfR-BeJcq8PFaSEk{?Lht_eTBmRUAIPU%)T9A%C@dcu?6?mvLtWH=b z%mTT0l8=Dmm>y%s&38AXd_5qeWOZ}EYfsO7*cE#bEn_ zI$n~^xfh{5`2;8@K1~oolyA=P7BJ&Re!}>fJYQ@ok!WAm)yI z{pW-#A5Z8P!>-TyW1v{S7s?$~D^>h1LqKqv28G zgt9lrtk^l)czUg)d$w41N*UbVFva+3T)@zvh|T-!qh5=+syBYF2yQnlFqUm-Dfx5P zs%2}MtIjRBFle$O*t~9e>r%ln&jtp$`h|ml=8#xzJq^a^+&wzlQcFa;2Sc6DnZTJW z%y&~rF>yCJxk{E2wfth>5V|dCN?C=rdbLJqT_v>dIt$txBdm3v*T-@`rjxoR6MNSzGS1)8 zw%lTk?Y^?dJ2T7i+HfTn&9yeiMyv67=9XIvQ)a2F{*qMjPh5GNWF~Cfc*dwo{~~kD zGh1h@>*=>Ub!q!iMNrpR53>~eBPYB3aO_yjxVXQkl)Wc6Bz>jXeMM0oy=4@XBdmKYszUYEnzkD|AAic69rxCY zK3PXD@B$Q%I6sc~ZdYPZ0_>R>uov$$ak}8J@!wxE6@Q?g}0bnS?*3v1=+msBBl30 zO7A~-@l11GJk*WKFP(7%e=y_9zk4MC9N}hqNwxjV7nGXweW`qNuD8&-M}YYP<#||e=|egX3h$Cp$GlmD@worM0NlMO zRj&NRsDr7BLf2WX(rKn#d!?2xiBtKQS8G!GSgB7A{^QI*{CIqXeBagcUQnj=;hAlJ zUjwsczkgl>w4Bq+`mejeai$ovZiGvVMQEVK~QwI(w8Y!3?oYh0URu`dY8;yX{%m3SL!msAlT(pGu#@=Lt4>r%SV|C z8MTUYGUsGVMiZneGHHAOWJ;@mOt4_VVw-f#0LfCh243130lRQ&Uz()%hLehpbiE&> z6ZEKsVhz`d28Cu)w{I#?(%0Xr!7x6iq~=^-cxSmY`RL>CjCdpEF!>R=X1 z5&qEbSLD*;Y6(H)sV>B8eM1ym&f)m~b|NA7?#5B1e*7|bmXyvWB7&uY2jNwzG8jY+ z=Y7}or6&WS*oUtO_D1frPN&nJD-PO340qY)B3z#2{i~OT_k)Q(JmD0AYXXwa4<7W3 za`&U!Lg>#^KkRhwAZiPW*QKio;43}v1&PwPCWuzno~^i@chYpp>IX-qPx{mDY7K(p zgRWD^oaLLNzT{^9C&$k;fxvG4!13G*E1%mT4xbnoq{3rwW_NVPfko)(426$CvU@}P zfn*qtNA`wDmT)jBA9`tscUvA*Mg*jG*ux=Akhc3kyf@!0R48#|?}~&nYC0VlP4LEImpWshQ?kc^rNbTzS|D~5Ks@`7FfcT delta 4313 zcmc(ie^is#8OL*9-n<3~@QMKuBE$e16)->mQPjYwRV*Z^Q|3=lbc%{-fm*C3h*RfJ z$fHeb9b$A=TSk!=%l=pue{IqgOIMF)inYj?t;mk(qD+o6d+tkOV^6D|p5xi&e7HZ~ zdvBia^W6KsrcSBpg0v#ZZ>mHhaYG*4?RLz>^6=xCL5z3Lqs?0*s#?fAY;BqWB}@s) zfcuPrm0|-7438k8P~?{3F7d{C2yM&Mgo2ildZ~#-!eRqWLx8Y@N)=eb1;jK1%X5$b zgs%Du=AMWF8I7nBFGPj}A<2m2QTt>p`yiu`I3yNv_PvVbiAW@pfQ&_S2=e#JOy!1NOStTqL3t{7V$vFBebs$^LQk}Q4e+GV=$kKs2p{> z{nvK;IIO&exH}rtFsC-66Hx=woThM>GObf%)10PYEDv_655w|o#DXk9R##^JBC-Gr zYml|b>qsF|gcKuhAnTA4eFvw_x4ndqj&+WsCISBd!GU(X=g+ojnwQ`Relmu%8wc;^3 zqzYs}RU9Uxu1Vv-`K)vrkx0rdm6>Fkd9ze#RwfY8Z%&7FqnuAUe{uUn(_Fqv)fTx@ zl4pTr&0xV~oKN_Y1-C=A$2QzqYb~A7Q1i>LuxF4tO9iS?8kPoub9SL{l!3rmCK_y` zBMzy889^0wh`38u;FwgGah|W@F6yy?wk2!)1ckbrfN78#)V{-68bZz)q0rahPN+=l zU0ZFwtuoq3Bh5KfJ{Bk4f(G7o)XzJM8}*9D zLzE>fJS<9c7sJ@qR!~?x6D}|jf~_iofGQ%iemV~NgHIKjkC!Uwl7LSG!mnl~1b ziBPmLLrOL{79dG*f1^Q0kM(ITE0FO}R9@;CgH3@9{=?neXdVfg>|R*4FdR~I{sbn4 zT2N2C?pC?l3%oMAd8ywWu6x^=q1!FD8t3fe^H7agY7@{Kn!pr@gXdf)*2yd%q{&QQ zQV9Y7&5)6T`n3*1_TZ^m z`Z2aq?e=2+4npJLUCiG@jw0_P`;p%vRmcIP8aaqqkweH~HP9XD1SKN~(+=*!1vo?o}vE*+Hz+pP0M!nMUUNjSPLGr#{mwX8yE4 z2(~S920EHGE&?Ti={*%xmxcX=DTRmg2QX0C69OS&m-)SCx8AOub>Pu7{FMU$@(2G6&-FZ;|$;osd#$)AcEh(2;Q@m@J65fscBpH zyX91UG%4@(;#mBV8fe<9x0WzR77@n!He=3idaj$wMK{H5(PS=|%OAK}Z#a;=I{0qD z+!|Yc>?=5}AD9Ye^XEgo8jSkm8|(jlX7ASxBgDVS^h%@w{K9r+~xXz)xO6>P-STd>v|wS z;jeaBKV+=c3|R=IrWC^doB@5G`ZIGRP^QVkH~B!=Q1b=wCm%z-pohZ?mC&}rAM~d< zW|0KQ@@#y|pte{BIj8Z4-5v;4Cpfs*kP5jMlwj=TpuNGDiNT>7Ezaf-2Tx1|ThmII zej~+tlra}$pg!-?aThP09|W3FM(-ZiI0L+UFzPiw_im*DFQ{tU-OK?juql(=CG=W# zwU&5%N>NAq|NW3hZxX&eJcLvhsOqz(5MfH=IT_TR=ArI}9293Jk|~gLF$msY6#)eu zoY2tGL}24Tw~;DXb9N_qui{Y*FF4pa0SY^{sCq6O?o>i+2M-r7;u&?5L%C6Ebuj8n z-@7hASxqD8|8f#_)e((2dD}8MG=0a3PVv@vN)m55K86g1R>2=0-I9?#;!?Y|a**B3 zQFpLZvxJu}!`a+{bd`%Mw06j$^|JsHFU}aWP7V*c6tJl)4Z>QK5VTH#pKbJGX&Jsl z8J0qD3rF%qS8Lg+gu2f;Fx~#l@o?MvK&WWp@WFU8r{5ioa_GU9zJH}a=f#a+y_f>J z-eBNQc)+eNJkSScK~j&x;i@u7YUP3L%7^MZzF_RJg6+%AaIa^-ptyI2K+mP6_`zEC zj|A|3pu}lnpyI({%O;kn(ZxOBTCW_pja#0ffr85%&Yc12oXMtUpB1_sLm>(^z{3)uX}pBKy{*Sj zsc8HvPLND)w2vW#wYCR*q9b3i>Rj!T=$yF4@sW#S;}RovOLWnZiHqX3k+HGyafvZ8 zi(_&ur#P~&ad#qF!yRD>V>#_c5-b@;GO{sSPZmgDWQOE$GQ^@+5_zL^GTG_rIWKj_ z(&g*s%}7s9wj_s<6tT~b9 + @@ -315,6 +316,7 @@ + diff --git a/FPS.cpp b/FPS.cpp index fd3c6ac..9e07251 100644 --- a/FPS.cpp +++ b/FPS.cpp @@ -60,7 +60,7 @@ void updateAnimationStepTime(float stepTime, float minFPS, float maxFPS) { stepTime = minFPS; else if (stepTime > maxFPS) FPS = maxFPS; - + float cappedStep = 1/(float)FPS; DWORD data = *(DWORD*)&cappedStep; writeToAddress(&data, convertAddress(0x012497F0), sizeof(data)); @@ -132,13 +132,9 @@ void renderLoop(void) { DWORD pGetTaskF = convertAddress(0x00577330); DWORD pCleanTaskF = convertAddress(0x00577450); - // FPS regulation - float maxFPS = (float)Settings::get().getFPSLimit(); - float minFPS = 10.0f; QueryPerformanceFrequency(&timerFreq); QueryPerformanceCounter(&counterAtStart); float lastTime = 0.0f; - unsigned int minStep = (unsigned int)(1.0f/maxFPS*1000); // Render loop do { @@ -232,6 +228,9 @@ void renderLoop(void) { // If rendering was performed, update animation step-time if((taskFunc == 2) || (taskFunc == 5)) { + // FPS regulation + float maxFPS = (float)Settings::get().getCurrentFPSLimit(); + float minFPS = 10.0f; float currentTime = getElapsedTime(); float deltaTime = currentTime - lastTime; diff --git a/FXAA.cpp b/FXAA.cpp new file mode 100644 index 0000000..ebe9c99 --- /dev/null +++ b/FXAA.cpp @@ -0,0 +1,92 @@ +#include "FXAA.h" + +#include +#include +#include +using namespace std; + +#include "Settings.h" +#include "RenderstateManager.h" + +FXAA::FXAA(IDirect3DDevice9 *device, int width, int height, Quality quality) + : Effect(device), width(width), height(height) { + + // Setup the defines for compiling the effect + vector defines; + stringstream s; + + // Setup pixel size macro + s << "float2(1.0 / " << width << ", 1.0 / " << height << ")"; + string pixelSizeText = s.str(); + D3DXMACRO pixelSizeMacro = { "PIXEL_SIZE", pixelSizeText.c_str() }; + defines.push_back(pixelSizeMacro); + + D3DXMACRO qualityMacros[] = { + { "FXAA_QUALITY__PRESET", "10" }, + { "FXAA_QUALITY__PRESET", "20" }, + { "FXAA_QUALITY__PRESET", "28" }, + { "FXAA_QUALITY__PRESET", "39" } + }; + defines.push_back(qualityMacros[(int)quality]); + + D3DXMACRO null = { NULL, NULL }; + defines.push_back(null); + + DWORD flags = D3DXFX_NOT_CLONEABLE | D3DXSHADER_OPTIMIZATION_LEVEL3; + + // Load effect from file + SDLOG(0, "FXAA load\n"); + ID3DXBuffer* errors; + HRESULT hr = D3DXCreateEffectFromFile(device, GetDirectoryFile("dsfix\\FXAA.fx"), &defines.front(), NULL, flags, NULL, &effect, &errors); + if(hr != D3D_OK) SDLOG(0, "ERRORS:\n %s\n", errors->GetBufferPointer()); + + // Create buffer + device->CreateTexture(width, height, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &buffer1Tex, NULL); + buffer1Tex->GetSurfaceLevel(0, &buffer1Surf); + + // get handles + frameTexHandle = effect->GetParameterByName(NULL, "frameTex2D"); +} + +FXAA::~FXAA() { + SAFERELEASE(effect); + SAFERELEASE(buffer1Surf); + SAFERELEASE(buffer1Tex); +} + +void FXAA::go(IDirect3DTexture9 *frame, IDirect3DSurface9 *dst) { + device->SetVertexDeclaration(vertexDeclaration); + + lumaPass(frame, buffer1Surf); + fxaaPass(buffer1Tex, dst); +} + +void FXAA::lumaPass(IDirect3DTexture9 *frame, IDirect3DSurface9 *dst) { + device->SetRenderTarget(0, dst); + + // Setup variables + effect->SetTexture(frameTexHandle, frame); + + // Do it! + UINT passes; + effect->Begin(&passes, 0); + effect->BeginPass(0); + quad(width, height); + effect->EndPass(); + effect->End(); +} + +void FXAA::fxaaPass(IDirect3DTexture9 *src, IDirect3DSurface9* dst) { + device->SetRenderTarget(0, dst); + + // Setup variables + effect->SetTexture(frameTexHandle, src); + + // Do it! + UINT passes; + effect->Begin(&passes, 0); + effect->BeginPass(1); + quad(width, height); + effect->EndPass(); + effect->End(); +} diff --git a/FXAA.h b/FXAA.h new file mode 100644 index 0000000..1d5c4e2 --- /dev/null +++ b/FXAA.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include + +#include "Effect.h" + +class FXAA : public Effect { +public: + enum Quality { QualityLow, QualityMedium, QualityHigh, QualityUltra }; + + FXAA(IDirect3DDevice9 *device, int width, int height, Quality quality); + virtual ~FXAA(); + + void go(IDirect3DTexture9 *frame, IDirect3DSurface9 *dst); + +private: + int width, height; + + ID3DXEffect *effect; + + IDirect3DTexture9* buffer1Tex; + IDirect3DSurface9* buffer1Surf; + + D3DXHANDLE frameTexHandle; + + void lumaPass(IDirect3DTexture9 *frame, IDirect3DSurface9 *dst); + void fxaaPass(IDirect3DTexture9 *src, IDirect3DSurface9* dst); +}; diff --git a/RenderstateManager.cpp b/RenderstateManager.cpp index 95a8672..5892384 100644 --- a/RenderstateManager.cpp +++ b/RenderstateManager.cpp @@ -17,7 +17,13 @@ RSManager RSManager::instance; void RSManager::initResources() { SDLOG(0, "RenderstateManager resource initialization started\n"); - if(Settings::get().getSmaaQuality()) smaa = new SMAA(d3ddev, Settings::get().getRenderWidth(), Settings::get().getRenderHeight(), (SMAA::Preset)(Settings::get().getSmaaQuality()-1)); + if(Settings::get().getAAQuality()) { + if(Settings::get().getAAType() == "SMAA") { + smaa = new SMAA(d3ddev, Settings::get().getRenderWidth(), Settings::get().getRenderHeight(), (SMAA::Preset)(Settings::get().getAAQuality()-1)); + } else { + fxaa = new FXAA(d3ddev, Settings::get().getRenderWidth(), Settings::get().getRenderHeight(), (FXAA::Quality)(Settings::get().getAAQuality()-1)); + } + } if(Settings::get().getSsaoStrength()) vssao = new VSSAO(d3ddev, Settings::get().getRenderWidth(), Settings::get().getRenderHeight(), Settings::get().getSsaoStrength()-1); if(Settings::get().getDOFBlurAmount()) gauss = new GAUSS(d3ddev, Settings::get().getDOFOverrideResolution()*16/9, Settings::get().getDOFOverrideResolution()); if(Settings::get().getEnableHudMod()) hud = new HUD(d3ddev, Settings::get().getRenderWidth(), Settings::get().getRenderHeight()); @@ -37,6 +43,7 @@ void RSManager::releaseResources() { SAFERELEASE(depthStencilSurf); SAFERELEASE(prevStateBlock); SAFEDELETE(smaa); + SAFEDELETE(fxaa); SAFEDELETE(vssao); SAFEDELETE(gauss); SAFEDELETE(hud); @@ -242,7 +249,7 @@ HRESULT RSManager::redirectSetRenderTarget(DWORD RenderTargetIndex, IDirect3DSur oldRenderTarget->Release(); } - if(mainRTuses == 5 && !lowFPSmode && mainRT && zSurf && (smaa && doSmaa)) { // time to do SMAA + if(mainRTuses == 5 && !lowFPSmode && mainRT && zSurf && doAA && (smaa || fxaa)) { // time to do SMAA IDirect3DSurface9 *oldRenderTarget; d3ddev->GetRenderTarget(0, &oldRenderTarget); if(oldRenderTarget == mainRT) { @@ -260,11 +267,10 @@ HRESULT RSManager::redirectSetRenderTarget(DWORD RenderTargetIndex, IDirect3DSur //d3ddev->SetRenderState(D3DRS_CLIPPING, FALSE); d3ddev->SetRenderState(D3DRS_CULLMODE, D3DCULL_CCW); d3ddev->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE); - // perform SMAA processing - if(smaa && doSmaa) { - smaa->go(tex, tex, rgbaBuffer1Surf, SMAA::INPUT_LUMA); - d3ddev->StretchRect(rgbaBuffer1Surf, NULL, oldRenderTarget, NULL, D3DTEXF_NONE); - } + // perform AA processing + if(smaa) smaa->go(tex, tex, rgbaBuffer1Surf, SMAA::INPUT_LUMA); + else fxaa->go(tex, rgbaBuffer1Surf); + d3ddev->StretchRect(rgbaBuffer1Surf, NULL, oldRenderTarget, NULL, D3DTEXF_NONE); restoreRenderState(); } tex->Release(); @@ -533,6 +539,17 @@ void RSManager::reloadGauss() { SDLOG(0, "Reloaded GAUSS\n"); } +void RSManager::reloadAA() { + SAFEDELETE(smaa); + SAFEDELETE(fxaa); + if(Settings::get().getAAType() == "SMAA") { + smaa = new SMAA(d3ddev, Settings::get().getRenderWidth(), Settings::get().getRenderHeight(), (SMAA::Preset)(Settings::get().getAAQuality()-1)); + } else { + fxaa = new FXAA(d3ddev, Settings::get().getRenderWidth(), Settings::get().getRenderHeight(), (FXAA::Quality)(Settings::get().getAAQuality()-1)); + } + SDLOG(0, "Reloaded AA\n"); +} + HRESULT RSManager::redirectDrawIndexedPrimitiveUP(D3DPRIMITIVETYPE PrimitiveType, UINT MinIndex, UINT NumVertices, UINT PrimitiveCount, CONST void* pIndexData, D3DFORMAT IndexDataFormat, CONST void* pVertexStreamZeroData, UINT VertexStreamZeroStride) { //if(onHudRT) { @@ -805,7 +822,7 @@ void RSManager::frameTimeManagement() { // implement FPS cap if(Settings::get().getUnlockFPS()) { - float desiredRenderTime = (1000.0f / Settings::get().getFPSLimit()) - 0.2f; + float desiredRenderTime = (1000.0f / Settings::get().getCurrentFPSLimit()) - 0.1f; while(renderTime < desiredRenderTime) { SwitchToThread(); renderTime = getElapsedTime() - lastPresentTime; diff --git a/RenderstateManager.h b/RenderstateManager.h index efbcf1a..855d6ba 100644 --- a/RenderstateManager.h +++ b/RenderstateManager.h @@ -6,6 +6,7 @@ #include "d3d9.h" #include "SMAA.h" +#include "FXAA.h" #include "VSSAO.h" #include "GAUSS.h" #include "HUD.h" @@ -20,8 +21,9 @@ class RSManager { float lastPresentTime; bool lowFPSmode; - bool doSmaa; + bool doAA; SMAA* smaa; + FXAA* fxaa; bool doVssao; VSSAO* vssao; @@ -106,8 +108,8 @@ class RSManager { return instance; } - RSManager() : smaa(NULL), vssao(NULL), gauss(NULL), rgbaBuffer1Surf(NULL), rgbaBuffer1Tex(NULL), - inited(false), doSmaa(true), doVssao(true), doDofGauss(true), doHud(true), captureNextFrame(false), capturing(false), hudStarted(false), takeScreenshot(false), hideHud(false), + RSManager() : smaa(NULL), fxaa(NULL), vssao(NULL), gauss(NULL), rgbaBuffer1Surf(NULL), rgbaBuffer1Tex(NULL), + inited(false), doAA(true), doVssao(true), doDofGauss(true), doHud(true), captureNextFrame(false), capturing(false), hudStarted(false), takeScreenshot(false), hideHud(false), mainRenderTexIndex(0), mainRenderSurfIndex(0), dumpCaptureIndex(0), numKnownTextures(0), foundKnownTextures(0), skippedPresents(0) { #define TEXTURE(_name, _hash) ++numKnownTextures; #include "Textures.def" @@ -131,13 +133,14 @@ class RSManager { void enableSingleFrameCapture(); void enableTakeScreenshot(); bool takingScreenshot() { return takeScreenshot; } - void toggleSmaa() { doSmaa = !doSmaa; } + void toggleAA() { doAA = !doAA; } void toggleVssao() { doVssao = !doVssao; } void toggleHideHud() { hideHud = !hideHud; } void toggleChangeHud() { doHud = !doHud; } void toggleDofGauss() { doDofGauss = !doDofGauss; } void reloadVssao(); void reloadGauss(); + void reloadAA(); bool allowStateChanges() { return !onHudRT; } diff --git a/Settings.cpp b/Settings.cpp index ad6fb81..a51dbeb 100644 --- a/Settings.cpp +++ b/Settings.cpp @@ -34,6 +34,8 @@ void Settings::load() { if(getOverrideLanguage().length() >= 2 && getOverrideLanguage().find("none") != 0) { performLanguageOverride(); } + + curFPSlimit = getFPSLimit(); } void Settings::report() { @@ -62,6 +64,17 @@ void Settings::shutdown() { } } +unsigned Settings::getCurrentFPSLimit() { + return curFPSlimit; +} +void Settings::setCurrentFPSLimit(unsigned limit) { + curFPSlimit = limit; +} +void Settings::toggle30FPSLimit() { + if(curFPSlimit == 30) curFPSlimit = getFPSLimit(); + else curFPSlimit = 30; +} + // reading -------------------------------------------------------------------- diff --git a/Settings.def b/Settings.def index 0e2de1b..059987e 100644 --- a/Settings.def +++ b/Settings.def @@ -10,7 +10,8 @@ SETTING(unsigned, DOFOverrideResolution, "dofOverrideResolution", 0); SETTING(bool, DisableDofScaling, "disableDofScaling", false); SETTING(unsigned, DOFBlurAmount, "dofBlurAmount", 0); -SETTING(unsigned, SmaaQuality, "smaaQuality", 0); +SETTING(unsigned, AAQuality, "aaQuality", 0); +SETTING(std::string, AAType, "aaType", "FXAA"); SETTING(unsigned, SsaoStrength, "ssaoStrength", 0); diff --git a/Settings.h b/Settings.h index 21a8120..dd2ff22 100644 --- a/Settings.h +++ b/Settings.h @@ -7,6 +7,7 @@ class Settings { static Settings instance; bool inited, langOverridden; + unsigned curFPSlimit; void read(char* source, bool& value); void read(char* source, int& value); @@ -47,5 +48,9 @@ class Settings { #include "Settings.def" #undef SETTING } + + unsigned getCurrentFPSLimit(); + void setCurrentFPSLimit(unsigned limit); + void toggle30FPSLimit(); }; diff --git a/VSSAO.cpp b/VSSAO.cpp index 73022ba..92839c5 100644 --- a/VSSAO.cpp +++ b/VSSAO.cpp @@ -39,10 +39,6 @@ VSSAO::VSSAO(IDirect3DDevice9 *device, int width, int height, unsigned strength) HRESULT hr = D3DXCreateEffectFromFile(device, GetDirectoryFile("dsfix\\VSSAO.fx"), &defines.front(), NULL, flags, NULL, &effect, &errors); if(hr != D3D_OK) SDLOG(0, "ERRORS:\n %s\n", errors->GetBufferPointer()); - - // Load thickness texture - D3DXCreateTextureFromFile(device, GetDirectoryFile("dsfix\\vssaothicknessmodel.png"), &thicknessTex); - // Create buffers device->CreateTexture(width, height, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &buffer1Tex, NULL); buffer1Tex->GetSurfaceLevel(0, &buffer1Surf); @@ -53,7 +49,6 @@ VSSAO::VSSAO(IDirect3DDevice9 *device, int width, int height, unsigned strength) depthTexHandle = effect->GetParameterByName(NULL, "depthTex2D"); frameTexHandle = effect->GetParameterByName(NULL, "frameTex2D"); prevPassTexHandle = effect->GetParameterByName(NULL, "prevPassTex2D"); - thicknessTexHandle = effect->GetParameterByName(NULL, "thicknessTex1D"); } VSSAO::~VSSAO() { @@ -62,7 +57,6 @@ VSSAO::~VSSAO() { SAFERELEASE(buffer1Tex); SAFERELEASE(buffer2Surf); SAFERELEASE(buffer2Tex); - SAFERELEASE(thicknessTex); } void VSSAO::go(IDirect3DTexture9 *frame, IDirect3DTexture9 *depth, IDirect3DSurface9 *dst) { @@ -84,7 +78,6 @@ void VSSAO::mainSsaoPass(IDirect3DTexture9* depth, IDirect3DSurface9* dst) { // Setup variables. effect->SetTexture(depthTexHandle, depth); - effect->SetTexture(thicknessTexHandle, thicknessTex); // Do it! UINT passes; diff --git a/VSSAO.h b/VSSAO.h index 4a1dea7..b824997 100644 --- a/VSSAO.h +++ b/VSSAO.h @@ -23,10 +23,8 @@ class VSSAO : public Effect { IDirect3DSurface9* buffer1Surf; IDirect3DTexture9* buffer2Tex; IDirect3DSurface9* buffer2Surf; - - IDirect3DTexture9* thicknessTex; - D3DXHANDLE depthTexHandle, frameTexHandle, prevPassTexHandle, thicknessTexHandle; + D3DXHANDLE depthTexHandle, frameTexHandle, prevPassTexHandle; void mainSsaoPass(IDirect3DTexture9 *depth, IDirect3DSurface9 *dst); void vBlurPass(IDirect3DTexture9 *depth, IDirect3DTexture9* src, IDirect3DSurface9* dst); diff --git a/main.h b/main.h index 1307704..ab4ebab 100644 --- a/main.h +++ b/main.h @@ -18,7 +18,7 @@ #pragma once -#define VERSION "1.7" +#define VERSION "1.8" #define RELEASE_VER