From 2bc409d8fdaf0aef0116f04840024a7a3507f93b Mon Sep 17 00:00:00 2001 From: FunkyFr3sh Date: Wed, 21 Jul 2021 14:52:44 +0200 Subject: [PATCH] add bilinear palette shader for d3d9 --- inc/d3d9shader.h | 240 +++++++++++++++++++++++++++++++++++++++++++++- inc/render_d3d9.h | 3 + src/render_d3d9.c | 47 ++++++--- 3 files changed, 278 insertions(+), 12 deletions(-) diff --git a/inc/d3d9shader.h b/inc/d3d9shader.h index 6d32268caf..963bf6d419 100644 --- a/inc/d3d9shader.h +++ b/inc/d3d9shader.h @@ -103,4 +103,242 @@ const BYTE D3D9_PALETTE_SHADER[] = 228, 128, 255, 255, 0, 0 }; -#endif \ No newline at end of file + +/* bilinear upscaling */ + +#if 0 +// +// Generated by Microsoft (R) HLSL Shader Compiler 10.1 +// +// Parameters: +// +// sampler2D PaletteTex; +// sampler2D SurfaceTex; +// float4 TextureSize; +// +// +// Registers: +// +// Name Reg Size +// ------------ ----- ---- +// TextureSize c0 1 +// SurfaceTex s0 1 +// PaletteTex s1 1 +// + + ps_2_0 + def c1, 0.5, 0, 0.99609375, 0.001953125 + dcl t0.xy + dcl_2d s0 + dcl_2d s1 + mov r0.y, c1.y + rcp r1.x, c0.x + rcp r1.y, c0.y + mul r0.zw, t0.wzyx, c0.wzyx + frc r0.zw, r0 + add r2.xy, -r0.wzyx, c1.x + mad r2.xy, r2, r1, t0 + add r3.xy, r1, r2 + mov r1.z, c1.y + add r4.x, r1.z, r2.x + add r4.y, r1.y, r2.y + add r1.x, r1.x, r2.x + add r1.y, r1.z, r2.y + texld r3, r3, s0 + texld r2, r2, s0 + texld r1, r1, s0 + texld r4, r4, s0 + mad r0.x, r3.x, c1.z, c1.w + mov r3.y, c1.y + mad r2.x, r2.x, c1.z, c1.w + mad r1.x, r1.x, c1.z, c1.w + mad r3.x, r4.x, c1.z, c1.w + mov r1.y, c1.y + mov r2.y, c1.y + texld r4, r0, s1 + texld r3, r3, s1 + texld r1, r1, s1 + texld r2, r2, s1 + lrp r5, r0.w, r4, r3 + lrp r3, r0.w, r1, r2 + lrp r1, r0.z, r5, r3 + mov oC0, r1 + +// approximately 32 instruction slots used (8 texture, 24 arithmetic) + +// fxc.exe /Tps_2_0 shader.hlsl /Fhshader.h +/* +uniform sampler2D SurfaceTex; +uniform sampler2D PaletteTex; + +float4 TextureSize : register(c0); + +float4 bilinear(float2 coord) +{ + float2 size = 1.0 / TextureSize.xy; + float2 f = frac(coord * TextureSize.xy); + + coord += (.5 - f) * size; + + float tli = tex2D(SurfaceTex, coord).r; + float tri = tex2D(SurfaceTex, coord + float2(size.x, 0.0) ).r; + float bli = tex2D(SurfaceTex, coord + float2(0.0, size.y)).r; + float bri = tex2D(SurfaceTex, coord + float2(size.x, size.y)).r; + + float4 tl = tex2D(PaletteTex, float2(tli * (255./256) + (0.5/256), 0)); + float4 tr = tex2D(PaletteTex, float2(tri * (255./256) + (0.5/256), 0)); + float4 bl = tex2D(PaletteTex, float2(bli * (255./256) + (0.5/256), 0)); + float4 br = tex2D(PaletteTex, float2(bri * (255./256) + (0.5/256), 0)); + + float4 top = lerp(tl, tr, f.x); + float4 bot = lerp(bl, br, f.x); + + return lerp(top, bot, f.y); +} + +float4 main(float2 texCoords : TEXCOORD) : COLOR +{ + return bilinear(texCoords); +} +*/ +#endif + +const BYTE D3D9_PALETTE_SHADER_BILINEAR[] = +{ + 0, 2, 255, 255, 254, 255, + 56, 0, 67, 84, 65, 66, + 28, 0, 0, 0, 179, 0, + 0, 0, 0, 2, 255, 255, + 3, 0, 0, 0, 28, 0, + 0, 0, 0, 1, 0, 0, + 172, 0, 0, 0, 88, 0, + 0, 0, 3, 0, 1, 0, + 1, 0, 0, 0, 100, 0, + 0, 0, 0, 0, 0, 0, + 116, 0, 0, 0, 3, 0, + 0, 0, 1, 0, 0, 0, + 128, 0, 0, 0, 0, 0, + 0, 0, 144, 0, 0, 0, + 2, 0, 0, 0, 1, 0, + 2, 0, 156, 0, 0, 0, + 0, 0, 0, 0, 80, 97, + 108, 101, 116, 116, 101, 84, + 101, 120, 0, 171, 4, 0, + 12, 0, 1, 0, 1, 0, + 1, 0, 0, 0, 0, 0, + 0, 0, 83, 117, 114, 102, + 97, 99, 101, 84, 101, 120, + 0, 171, 4, 0, 12, 0, + 1, 0, 1, 0, 1, 0, + 0, 0, 0, 0, 0, 0, + 84, 101, 120, 116, 117, 114, + 101, 83, 105, 122, 101, 0, + 1, 0, 3, 0, 1, 0, + 4, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 112, 115, + 95, 50, 95, 48, 0, 77, + 105, 99, 114, 111, 115, 111, + 102, 116, 32, 40, 82, 41, + 32, 72, 76, 83, 76, 32, + 83, 104, 97, 100, 101, 114, + 32, 67, 111, 109, 112, 105, + 108, 101, 114, 32, 49, 48, + 46, 49, 0, 171, 81, 0, + 0, 5, 1, 0, 15, 160, + 0, 0, 0, 63, 0, 0, + 0, 0, 0, 0, 127, 63, + 0, 0, 0, 59, 31, 0, + 0, 2, 0, 0, 0, 128, + 0, 0, 3, 176, 31, 0, + 0, 2, 0, 0, 0, 144, + 0, 8, 15, 160, 31, 0, + 0, 2, 0, 0, 0, 144, + 1, 8, 15, 160, 1, 0, + 0, 2, 0, 0, 2, 128, + 1, 0, 85, 160, 6, 0, + 0, 2, 1, 0, 1, 128, + 0, 0, 0, 160, 6, 0, + 0, 2, 1, 0, 2, 128, + 0, 0, 85, 160, 5, 0, + 0, 3, 0, 0, 12, 128, + 0, 0, 27, 176, 0, 0, + 27, 160, 19, 0, 0, 2, + 0, 0, 12, 128, 0, 0, + 228, 128, 2, 0, 0, 3, + 2, 0, 3, 128, 0, 0, + 27, 129, 1, 0, 0, 160, + 4, 0, 0, 4, 2, 0, + 3, 128, 2, 0, 228, 128, + 1, 0, 228, 128, 0, 0, + 228, 176, 2, 0, 0, 3, + 3, 0, 3, 128, 1, 0, + 228, 128, 2, 0, 228, 128, + 1, 0, 0, 2, 1, 0, + 4, 128, 1, 0, 85, 160, + 2, 0, 0, 3, 4, 0, + 1, 128, 1, 0, 170, 128, + 2, 0, 0, 128, 2, 0, + 0, 3, 4, 0, 2, 128, + 1, 0, 85, 128, 2, 0, + 85, 128, 2, 0, 0, 3, + 1, 0, 1, 128, 1, 0, + 0, 128, 2, 0, 0, 128, + 2, 0, 0, 3, 1, 0, + 2, 128, 1, 0, 170, 128, + 2, 0, 85, 128, 66, 0, + 0, 3, 3, 0, 15, 128, + 3, 0, 228, 128, 0, 8, + 228, 160, 66, 0, 0, 3, + 2, 0, 15, 128, 2, 0, + 228, 128, 0, 8, 228, 160, + 66, 0, 0, 3, 1, 0, + 15, 128, 1, 0, 228, 128, + 0, 8, 228, 160, 66, 0, + 0, 3, 4, 0, 15, 128, + 4, 0, 228, 128, 0, 8, + 228, 160, 4, 0, 0, 4, + 0, 0, 1, 128, 3, 0, + 0, 128, 1, 0, 170, 160, + 1, 0, 255, 160, 1, 0, + 0, 2, 3, 0, 2, 128, + 1, 0, 85, 160, 4, 0, + 0, 4, 2, 0, 1, 128, + 2, 0, 0, 128, 1, 0, + 170, 160, 1, 0, 255, 160, + 4, 0, 0, 4, 1, 0, + 1, 128, 1, 0, 0, 128, + 1, 0, 170, 160, 1, 0, + 255, 160, 4, 0, 0, 4, + 3, 0, 1, 128, 4, 0, + 0, 128, 1, 0, 170, 160, + 1, 0, 255, 160, 1, 0, + 0, 2, 1, 0, 2, 128, + 1, 0, 85, 160, 1, 0, + 0, 2, 2, 0, 2, 128, + 1, 0, 85, 160, 66, 0, + 0, 3, 4, 0, 15, 128, + 0, 0, 228, 128, 1, 8, + 228, 160, 66, 0, 0, 3, + 3, 0, 15, 128, 3, 0, + 228, 128, 1, 8, 228, 160, + 66, 0, 0, 3, 1, 0, + 15, 128, 1, 0, 228, 128, + 1, 8, 228, 160, 66, 0, + 0, 3, 2, 0, 15, 128, + 2, 0, 228, 128, 1, 8, + 228, 160, 18, 0, 0, 4, + 5, 0, 15, 128, 0, 0, + 255, 128, 4, 0, 228, 128, + 3, 0, 228, 128, 18, 0, + 0, 4, 3, 0, 15, 128, + 0, 0, 255, 128, 1, 0, + 228, 128, 2, 0, 228, 128, + 18, 0, 0, 4, 1, 0, + 15, 128, 0, 0, 170, 128, + 5, 0, 228, 128, 3, 0, + 228, 128, 1, 0, 0, 2, + 0, 8, 15, 128, 1, 0, + 228, 128, 255, 255, 0, 0 +}; + +#endif diff --git a/inc/render_d3d9.h b/inc/render_d3d9.h index cb16bda30a..0e653cbb80 100644 --- a/inc/render_d3d9.h +++ b/inc/render_d3d9.h @@ -19,9 +19,12 @@ typedef struct D3D9RENDERER IDirect3DTexture9* surface_tex[D3D9_TEXTURE_COUNT]; IDirect3DTexture9* palette_tex[D3D9_TEXTURE_COUNT]; IDirect3DPixelShader9* pixel_shader; + IDirect3DPixelShader9* pixel_shader_bilinear; float scale_w; float scale_h; int bits_per_pixel; + int tex_width; + int tex_height; } D3D9RENDERER; BOOL d3d9_is_available(); diff --git a/src/render_d3d9.c b/src/render_d3d9.c index 327b3796d5..85419eac35 100644 --- a/src/render_d3d9.c +++ b/src/render_d3d9.c @@ -130,8 +130,13 @@ BOOL d3d9_release() g_d3d9.pixel_shader = NULL; } - int i; - for (i = 0; i < D3D9_TEXTURE_COUNT; i++) + if (g_d3d9.pixel_shader_bilinear) + { + IDirect3DPixelShader9_Release(g_d3d9.pixel_shader_bilinear); + g_d3d9.pixel_shader_bilinear = NULL; + } + + for (int i = 0; i < D3D9_TEXTURE_COUNT; i++) { if (g_d3d9.surface_tex[i]) { @@ -174,16 +179,16 @@ static BOOL d3d9_create_resouces() int width = g_ddraw->width; int height = g_ddraw->height; - int tex_width = + g_d3d9.tex_width = width <= 1024 ? 1024 : width <= 2048 ? 2048 : width <= 4096 ? 4096 : width; - int tex_height = - height <= tex_width ? tex_width : height <= 2048 ? 2048 : height <= 4096 ? 4096 : height; + g_d3d9.tex_height = + height <= g_d3d9.tex_width ? g_d3d9.tex_width : height <= 2048 ? 2048 : height <= 4096 ? 4096 : height; - tex_width = tex_width > tex_height ? tex_width : tex_height; + g_d3d9.tex_width = g_d3d9.tex_width > g_d3d9.tex_height ? g_d3d9.tex_width : g_d3d9.tex_height; - g_d3d9.scale_w = (float)width / tex_width;; - g_d3d9.scale_h = (float)height / tex_height; + g_d3d9.scale_w = (float)width / g_d3d9.tex_width;; + g_d3d9.scale_h = (float)height / g_d3d9.tex_height; err = err || FAILED( IDirect3DDevice9_CreateVertexBuffer( @@ -201,8 +206,8 @@ static BOOL d3d9_create_resouces() err = err || FAILED( IDirect3DDevice9_CreateTexture( g_d3d9.device, - tex_width, - tex_height, + g_d3d9.tex_width, + g_d3d9.tex_height, 1, 0, g_ddraw->bpp == 16 ? D3DFMT_R5G6B5 : g_ddraw->bpp == 32 ? D3DFMT_X8R8G8B8 : D3DFMT_L8, @@ -234,6 +239,11 @@ static BOOL d3d9_create_resouces() { err = err || FAILED( IDirect3DDevice9_CreatePixelShader(g_d3d9.device, (DWORD*)D3D9_PALETTE_SHADER, &g_d3d9.pixel_shader)); + + IDirect3DDevice9_CreatePixelShader( + g_d3d9.device, + (DWORD*)D3D9_PALETTE_SHADER_BILINEAR, + &g_d3d9.pixel_shader_bilinear); } return g_d3d9.vertex_buf && (g_d3d9.pixel_shader || g_ddraw->bpp == 16 || g_ddraw->bpp == 32) && !err; @@ -250,7 +260,22 @@ static BOOL d3d9_set_states() if (g_ddraw->bpp == 8) { err = err || FAILED(IDirect3DDevice9_SetTexture(g_d3d9.device, 1, (IDirect3DBaseTexture9*)g_d3d9.palette_tex[0])); - err = err || FAILED(IDirect3DDevice9_SetPixelShader(g_d3d9.device, g_d3d9.pixel_shader)); + + BOOL bilinear = + g_ddraw->d3d9linear && + g_d3d9.pixel_shader_bilinear && + (g_ddraw->render.viewport.width != g_ddraw->width || g_ddraw->render.viewport.height != g_ddraw->height); + + err = err || FAILED( + IDirect3DDevice9_SetPixelShader( + g_d3d9.device, + bilinear ? g_d3d9.pixel_shader_bilinear : g_d3d9.pixel_shader)); + + if (bilinear) + { + float texture_size[4] = { (float)g_d3d9.tex_width, (float)g_d3d9.tex_height, 0, 0 }; + err = err || FAILED(IDirect3DDevice9_SetPixelShaderConstantF(g_d3d9.device, 0, texture_size, 1)); + } } else {