diff --git a/frontend/libretro.c b/frontend/libretro.c index 7f953a5f5..86c336735 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2140,7 +2140,7 @@ static void update_variables(bool in_flight) { axis_bounds_modifier = true; } - else if (strcmp(var.value, "circle") == 0) + else { axis_bounds_modifier = false; } @@ -2864,26 +2864,19 @@ static uint16_t get_analog_button(int16_t ret, retro_input_state_t input_state_c return button; } -unsigned char axis_range_modifier(int16_t axis_value, bool is_square) +static unsigned char axis_range_modifier(int axis_value, bool is_square) { - float modifier_axis_range = 0; + int modifier_axis_range; if (is_square) - { - modifier_axis_range = round((axis_value >> 8) / 0.785) + 128; - if (modifier_axis_range < 0) - { - modifier_axis_range = 0; - } - else if (modifier_axis_range > 255) - { - modifier_axis_range = 255; - } - } + modifier_axis_range = roundf((axis_value >> 8) / 0.785f) + 128; else - { - modifier_axis_range = MIN(((axis_value >> 8) + 128), 255); - } + modifier_axis_range = (axis_value >> 8) + 128; + + if (modifier_axis_range < 0) + modifier_axis_range = 0; + else if (modifier_axis_range > 255) + modifier_axis_range = 255; return modifier_axis_range; } diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index a07491b6e..0d9379db7 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -909,7 +909,7 @@ struct retro_core_option_v2_definition option_defs_us[] = { { "square", "Square" }, { NULL, NULL }, }, - "circle", + "square", }, { "pcsx_rearmed_vibration", diff --git a/libpcsxcore/cdrom.c b/libpcsxcore/cdrom.c index 5c7e76492..9e201eb97 100644 --- a/libpcsxcore/cdrom.c +++ b/libpcsxcore/cdrom.c @@ -931,7 +931,6 @@ void cdrInterrupt(void) { error = ERROR_BAD_ARGNUM; goto set_error; } - cdr.DriveState = DRIVESTATE_STANDBY; second_resp_time = cdReadTime * 125 / 2; start_rotating = 1; break; diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index cf2a16aab..35d004145 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -32,6 +32,8 @@ static const char * const gpu_slow_llist_db[] = "SLES01712", "SLPS01525", "SLPS91138", "SLPM87102", "SLUS00823", /* Crash Bash */ "SCES02834", "SCUS94570", "SCUS94616", "SCUS94654", + /* F1 2000 - aborting/resuming dma in menus */ + "SLUS01120", "SLES02722", "SLES02723", "SLES02724", "SLPS02758", "SLPM80564", /* Final Fantasy IV */ "SCES03840", "SLPM86028", "SLUS01360", /* Point Blank - calibration cursor */ @@ -54,12 +56,6 @@ static const char * const gpu_centering_hack_db[] = "SLPM86009", }; -static const char * const dualshock_timing1024_hack_db[] = -{ - /* Judge Dredd - could also be poor cdrom+mdec+dma timing */ - "SLUS00630", "SLES00755", -}; - static const char * const dualshock_init_analog_hack_db[] = { /* Formula 1 Championship Edition */ @@ -109,7 +105,6 @@ hack_db[] = HACK_ENTRY(cdr_read_timing, cdr_read_hack_db), HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db), HACK_ENTRY(gpu_centering, gpu_centering_hack_db), - HACK_ENTRY(gpu_timing1024, dualshock_timing1024_hack_db), HACK_ENTRY(dualshock_init_analog, dualshock_init_analog_hack_db), HACK_ENTRY(fractional_Framerate, fractional_Framerate_hack_db), HACK_ENTRY(f1, f1_hack_db), @@ -157,6 +152,22 @@ cycle_multiplier_overrides[] = { 200, { "SLUS01519", "SCPS45260", "SLPS01463" } }, }; +static const struct +{ + int cycles; + const char * const id[4]; +} +gpu_timing_hack_db[] = +{ + /* Judge Dredd - poor cdrom+mdec+dma+gpu timing */ + { 1024, { "SLUS00630", "SLES00755" } }, + /* F1 2000 - flooding the GPU in menus */ + { 300*1024, { "SLUS01120", "SLES02722", "SLES02723", "SLES02724" } }, + { 300*1024, { "SLPS02758", "SLPM80564" } }, + /* Soul Blade - same as above */ + { 512*1024, { "SLUS00240", "SCES00577" } }, +}; + static const char * const lightrec_hack_db[] = { /* Tomb Raider (Rev 2) - boot menu clears over itself */ @@ -223,6 +234,22 @@ void Apply_Hacks_Cdrom(void) } } + Config.gpu_timing_override = 0; + for (i = 0; i < ARRAY_SIZE(gpu_timing_hack_db); i++) + { + const char * const * const ids = gpu_timing_hack_db[i].id; + for (j = 0; j < ARRAY_SIZE(gpu_timing_hack_db[i].id); j++) + if (ids[j] && strcmp(ids[j], CdromId) == 0) + break; + if (j < ARRAY_SIZE(gpu_timing_hack_db[i].id)) + { + Config.gpu_timing_override = gpu_timing_hack_db[i].cycles; + SysPrintf("using gpu_timing_override: %d\n", + Config.gpu_timing_override); + break; + } + } + if (drc_is_lightrec()) { lightrec_hacks = 0; if (Config.hacks.f1) diff --git a/libpcsxcore/psxcommon.h b/libpcsxcore/psxcommon.h index 0a1ef7078..9c9bcda51 100644 --- a/libpcsxcore/psxcommon.h +++ b/libpcsxcore/psxcommon.h @@ -145,6 +145,7 @@ typedef struct { boolean TurboCD; int cycle_multiplier; // 100 for 1.0 int cycle_multiplier_override; + int gpu_timing_override; s8 GpuListWalking; s8 FractionalFramerate; // ~49.75 and ~59.81 instead of 50 and 60 u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER @@ -154,7 +155,6 @@ typedef struct { boolean gpu_slow_list_walking; boolean gpu_centering; boolean dualshock_init_analog; - boolean gpu_timing1024; boolean fractional_Framerate; boolean f1; } hacks; diff --git a/libpcsxcore/psxdma.c b/libpcsxcore/psxdma.c index 68b9694f3..af791c0e5 100644 --- a/libpcsxcore/psxdma.c +++ b/libpcsxcore/psxdma.c @@ -192,10 +192,11 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU case 0x01000401: // dma chain PSXDMA_LOG("*** DMA 2 - GPU dma chain *** %lx addr = %lx size = %lx\n", chcr, madr, bcr); // when not emulating walking progress, end immediately + // (some games abort the dma and read madr so break out of that logic) madr_next = 0xffffff; do_walking = Config.GpuListWalking; - if (do_walking < 0 || Config.hacks.gpu_timing1024) + if (do_walking < 0) do_walking = Config.hacks.gpu_slow_list_walking; madr_next_p = do_walking ? &madr_next : NULL; @@ -204,13 +205,13 @@ void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU HW_DMA2_MADR = SWAPu32(madr_next); - // a hack for Judge Dredd which is annoyingly sensitive to timing - if (Config.hacks.gpu_timing1024) - cycles_sum = 1024; + // timing hack with some lame heuristics + if (Config.gpu_timing_override && (do_walking || cycles_sum > 64)) + cycles_sum = Config.gpu_timing_override; psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; set_event(PSXINT_GPUDMA, cycles_sum); - //printf("%u dma2cf: %6d,%4d %08x %08x %08x %08x\n", psxRegs.cycle, + //printf("%u dma2cf: %6ld,%4d %08x %08x %08x %08x\n", psxRegs.cycle, // cycles_sum, cycles_last_cmd, madr, bcr, chcr, HW_DMA2_MADR); return; @@ -237,8 +238,8 @@ void gpuInterrupt() { cycles_sum += psxRegs.gpuIdleAfter - psxRegs.cycle; psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd; set_event(PSXINT_GPUDMA, cycles_sum); - //printf("%u dma2cn: %6d,%4d %08x\n", psxRegs.cycle, cycles_sum, - // cycles_last_cmd, HW_DMA2_MADR); + //printf("%u dma2cn: %6ld,%4d %08x\n", psxRegs.cycle, cycles_sum, + // cycles_last_cmd, HW_DMA2_MADR); return; } if (HW_DMA2_CHCR & SWAP32(0x01000000)) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 19f1c199b..a58b5b6da 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -1935,7 +1935,7 @@ setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block, #define setup_blocks_add_blocks_direct() \ stats_add(texel_blocks_untextured, span_num_blocks); \ - span_pixel_blocks += span_num_blocks \ + stats_add(span_pixel_blocks, span_num_blocks); \ #define setup_blocks_builder(shading, texturing, dithering, sw, target) \ @@ -2918,9 +2918,9 @@ char *render_block_flag_strings[] = (triangle_winding_##winding << 6)) \ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, - vertex_struct *vertexes_out[3]) + prepared_triangle *triangle_out) { - s32 y_top, y_bottom; + s32 y_top, y_bottom, offset_x, offset_y, i; s32 triangle_area; u32 triangle_winding = 0; @@ -2955,6 +2955,7 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, y_bottom = c->y; y_top = a->y; + offset_y = sign_extend_11bit(y_top + psx_gpu->offset_y) - y_top; if((y_bottom - y_top) >= 512) { @@ -2982,7 +2983,7 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, vertex_swap(a, b); } - if((c->x - psx_gpu->offset_x) >= 1024 || (c->x - a->x) >= 1024) + if(c->x - a->x >= 1024) { #ifdef PROFILE trivial_rejects++; @@ -2990,8 +2991,10 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, return 0; } - if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x, - y_bottom) == 0) + offset_x = sign_extend_11bit(a->x + psx_gpu->offset_x) - a->x; + if(invalidate_texture_cache_region_viewport(psx_gpu, + a->x + offset_x, y_top + offset_y, + c->x + offset_x, y_bottom + offset_y) == 0) { #ifdef PROFILE trivial_rejects++; @@ -2999,12 +3002,20 @@ static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, return 0; } + for (i = 0; i < 3; i++) + { + vertexes[i].x += offset_x; + vertexes[i].y += offset_y; + } + psx_gpu->triangle_area = triangle_area; psx_gpu->triangle_winding = triangle_winding; - vertexes_out[0] = a; - vertexes_out[1] = b; - vertexes_out[2] = c; + triangle_out->vertexes[0] = a; + triangle_out->vertexes[1] = b; + triangle_out->vertexes[2] = c; + triangle_out->offset_x = offset_x; + triangle_out->offset_y = offset_y; return 1; } @@ -3157,9 +3168,9 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags) { - vertex_struct *vertex_ptrs[3]; - if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) - render_triangle_p(psx_gpu, vertex_ptrs, flags); + prepared_triangle triangle; + if (prepare_triangle(psx_gpu, vertexes, &triangle)) + render_triangle_p(psx_gpu, triangle.vertexes, flags); } #if !defined(NEON_BUILD) || defined(SIMD_BUILD) @@ -5067,6 +5078,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1); psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0); psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2); + psx_gpu->allow_dithering = 1; psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 1ea391709..edea0a9eb 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -32,6 +32,9 @@ #define unlikely(x) __builtin_expect((x), 0) #endif +#define sign_extend_11bit(value) \ + (((s32)((value) << 21)) >> 21) + typedef enum { PRIMITIVE_TYPE_TRIANGLE = 0, @@ -215,7 +218,7 @@ typedef struct // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[184 + 8*4 - 9*sizeof(void *)]; + u8 reserved_a[180 + 9*4 - 9*sizeof(void *)]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; @@ -247,6 +250,13 @@ typedef struct __attribute__((aligned(16))) u32 padding; } vertex_struct; +typedef struct +{ + vertex_struct *vertexes[3]; + s16 offset_x; + s16 offset_y; +} prepared_triangle; + void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, u32 width, u32 height); void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index 5f1f38348..30ec48c40 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -18,6 +18,12 @@ #include "SDL.h" #include "common.h" +#include "../../gpulib/gpu.h" +#include "psx_gpu.c" +#include "psx_gpu_parse.c" + +#pragma GCC diagnostic ignored "-Wunused-result" + extern u32 span_pixels; extern u32 span_pixel_blocks; extern u32 spans; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 03d055d53..e6fe06211 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -202,14 +202,9 @@ static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, } } -#define sign_extend_11bit(value) \ - (((s32)((value) << 21)) >> 21) \ - #define get_vertex_data_xy(vertex_number, offset16) \ - vertexes[vertex_number].x = \ - sign_extend_11bit(list_s16[offset16]) + psx_gpu->offset_x; \ - vertexes[vertex_number].y = \ - sign_extend_11bit(list_s16[(offset16) + 1]) + psx_gpu->offset_y; \ + vertexes[vertex_number].x = sign_extend_11bit(list_s16[offset16]); \ + vertexes[vertex_number].y = sign_extend_11bit(list_s16[(offset16) + 1]); \ #define get_vertex_data_uv(vertex_number, offset16) \ vertexes[vertex_number].u = list_s16[offset16] & 0xFF; \ @@ -260,13 +255,44 @@ static void textured_sprite(psx_gpu_struct *psx_gpu, const u32 *list, gput_sum(*cpu_cycles_sum, *cpu_cycles, gput_sprite(width, height)); } +static void undo_offset(vertex_struct *vertexes, prepared_triangle *triangle) +{ + s32 i; + for (i = 0; i < 3; i++) + { + vertexes[i].x -= triangle->offset_x; + vertexes[i].y -= triangle->offset_y; + } +} + +static void do_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 current_command) +{ + prepared_triangle triangle; + if (prepare_triangle(psx_gpu, vertexes, &triangle)) + render_triangle_p(psx_gpu, triangle.vertexes, current_command); +} + +static void do_quad(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 current_command) +{ + prepared_triangle triangle; + if (prepare_triangle(psx_gpu, vertexes, &triangle)) + { + render_triangle_p(psx_gpu, triangle.vertexes, current_command); + undo_offset(vertexes, &triangle); + } + if (prepare_triangle(psx_gpu, vertexes + 1, &triangle)) + render_triangle_p(psx_gpu, triangle.vertexes, current_command); +} + u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command) { vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; u32 current_command = 0, command_length; u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last; - u32 siplified_prim[4*4]; + u32 simplified_prim[4*4]; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -307,7 +333,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(1, 4); get_vertex_data_xy(2, 6); - render_triangle(psx_gpu, vertexes, current_command); + do_triangle(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base()); break; } @@ -322,7 +348,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(1, 6); get_vertex_data_xy_uv(2, 10); - render_triangle(psx_gpu, vertexes, current_command); + do_triangle(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t()); break; } @@ -335,9 +361,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy(1, 4); get_vertex_data_xy(2, 6); get_vertex_data_xy(3, 8); - - render_triangle(psx_gpu, vertexes, current_command); - render_triangle(psx_gpu, &(vertexes[1]), current_command); + + do_quad(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base()); break; } @@ -346,10 +371,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 i, simplified_count; set_texture(psx_gpu, list[4] >> 16); - if ((simplified_count = prim_try_simplify_quad_t(siplified_prim, list))) + if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) && + (simplified_count = prim_try_simplify_quad_t(simplified_prim, list))) { for (i = 0; i < simplified_count; i++) { - const u32 *list_ = &siplified_prim[i * 4]; + const u32 *list_ = &simplified_prim[i * 4]; textured_sprite(psx_gpu, list_, list_[3] & 0x3FF, (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles); } @@ -363,9 +389,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv(1, 6); get_vertex_data_xy_uv(2, 10); get_vertex_data_xy_uv(3, 14); - - render_triangle(psx_gpu, vertexes, current_command); - render_triangle(psx_gpu, &(vertexes[1]), current_command); + + do_quad(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t()); break; } @@ -376,7 +401,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(1, 4); get_vertex_data_xy_rgb(2, 8); - render_triangle(psx_gpu, vertexes, current_command); + do_triangle(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g()); break; } @@ -390,7 +415,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(1, 6); get_vertex_data_xy_uv_rgb(2, 12); - render_triangle(psx_gpu, vertexes, current_command); + do_triangle(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt()); break; } @@ -401,9 +426,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_rgb(1, 4); get_vertex_data_xy_rgb(2, 8); get_vertex_data_xy_rgb(3, 12); - - render_triangle(psx_gpu, vertexes, current_command); - render_triangle(psx_gpu, &(vertexes[1]), current_command); + + do_quad(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g()); break; } @@ -412,10 +436,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 i, simplified_count; set_texture(psx_gpu, list[5] >> 16); - if ((simplified_count = prim_try_simplify_quad_gt(siplified_prim, list))) + if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) && + (simplified_count = prim_try_simplify_quad_gt(simplified_prim, list))) { for (i = 0; i < simplified_count; i++) { - const u32 *list_ = &siplified_prim[i * 4]; + const u32 *list_ = &simplified_prim[i * 4]; textured_sprite(psx_gpu, list_, list_[3] & 0x3FF, (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles); } @@ -428,9 +453,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, get_vertex_data_xy_uv_rgb(1, 6); get_vertex_data_xy_uv_rgb(2, 12); get_vertex_data_xy_uv_rgb(3, 18); - - render_triangle(psx_gpu, vertexes, current_command); - render_triangle(psx_gpu, &(vertexes[1]), current_command); + + do_quad(psx_gpu, vertexes, current_command); gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt()); break; } @@ -755,10 +779,8 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, case 0xE5: { - s32 offset_x = list[0] << 21; - s32 offset_y = list[0] << 10; - psx_gpu->offset_x = offset_x >> 21; - psx_gpu->offset_y = offset_y >> 21; + psx_gpu->offset_x = sign_extend_11bit(list[0]); + psx_gpu->offset_y = sign_extend_11bit(list[0] >> 11); SET_Ex(5, list[0]); break; @@ -1054,15 +1076,16 @@ static u32 uv_hack(psx_gpu_struct *psx_gpu, const vertex_struct *vertex_ptrs) static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 current_command) { - vertex_struct *vertex_ptrs[3]; + prepared_triangle triangle; - if (!prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) + if (!prepare_triangle(psx_gpu, vertexes, &triangle)) return; if (!psx_gpu->hack_disable_main) - render_triangle_p(psx_gpu, vertex_ptrs, current_command); + render_triangle_p(psx_gpu, triangle.vertexes, current_command); - if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x)) + if (!check_enhanced_range(psx_gpu, triangle.vertexes[0]->x, + triangle.vertexes[2]->x)) return; if (!enhancement_enable(psx_gpu)) @@ -1070,17 +1093,21 @@ static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, if ((current_command & RENDER_FLAGS_TEXTURE_MAP) && psx_gpu->hack_texture_adj) psx_gpu->hacks_active |= uv_hack(psx_gpu, vertexes); - shift_vertices3(vertex_ptrs); + shift_vertices3(triangle.vertexes); shift_triangle_area(); - render_triangle_p(psx_gpu, vertex_ptrs, current_command); - unshift_vertices3(vertex_ptrs); + render_triangle_p(psx_gpu, triangle.vertexes, current_command); + //unshift_vertices3(triangle.vertexes); } static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 current_command) { + s16 x12_save[2] = { vertexes[1].x, vertexes[2].x }; + s16 y12_save[2] = { vertexes[1].y, vertexes[2].y }; do_triangle_enhanced(psx_gpu, vertexes, current_command); enhancement_disable(); + vertexes[1].x = x12_save[0], vertexes[2].x = x12_save[1]; + vertexes[1].y = y12_save[0], vertexes[2].y = y12_save[1]; do_triangle_enhanced(psx_gpu, &vertexes[1], current_command); } @@ -1185,7 +1212,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, vertex_struct vertexes[4] __attribute__((aligned(16))) = {}; u32 current_command = 0, command_length; u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last; - u32 siplified_prim[4*4]; + u32 simplified_prim[4*4]; u32 *list_start = list; u32 *list_end = list + (size / 4); @@ -1292,10 +1319,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 i, simplified_count; set_texture(psx_gpu, list[4] >> 16); - if ((simplified_count = prim_try_simplify_quad_t(siplified_prim, list))) + if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) && + (simplified_count = prim_try_simplify_quad_t(simplified_prim, list))) { for (i = 0; i < simplified_count; i++) { - const u32 *list_ = &siplified_prim[i * 4]; + const u32 *list_ = &simplified_prim[i * 4]; textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF, (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles); } @@ -1356,10 +1384,11 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, { u32 i, simplified_count; set_texture(psx_gpu, list[5] >> 16); - if ((simplified_count = prim_try_simplify_quad_gt(siplified_prim, list))) + if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) && + (simplified_count = prim_try_simplify_quad_gt(simplified_prim, list))) { for (i = 0; i < simplified_count; i++) { - const u32 *list_ = &siplified_prim[i * 4]; + const u32 *list_ = &simplified_prim[i * 4]; textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF, (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles); } diff --git a/plugins/gpu_neon/psx_gpu/tests/Makefile b/plugins/gpu_neon/psx_gpu/tests/Makefile index 21d615589..465dbb493 100644 --- a/plugins/gpu_neon/psx_gpu/tests/Makefile +++ b/plugins/gpu_neon/psx_gpu/tests/Makefile @@ -1,30 +1,29 @@ CC = $(CROSS_COMPILE)gcc -CFLAGS += -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP +CFLAGS += -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP -DGPU_NEON CFLAGS += -Wall -ggdb CFLAGS += -fno-strict-aliasing CFLAGS += `sdl-config --cflags` LDLIBS += `sdl-config --libs` -VPATH += .. - ifdef NEON CFLAGS += -mcpu=cortex-a8 -mfpu=neon -DNEON_BUILD ASFLAGS = $(CFLAGS) -OBJ += psx_gpu_arm_neon.o +OBJ += ../psx_gpu_arm_neon.o else CFLAGS += -DNEON_BUILD -DSIMD_BUILD -OBJ += psx_gpu_simd.o +OBJ += ../psx_gpu_simd.o endif ifndef DEBUG CFLAGS += -O2 -DNDEBUG endif -OBJ += psx_gpu.o psx_gpu_parse.o psx_gpu_main.o +OBJ += ../psx_gpu_main.o ../../../gpulib/prim.o all: psx_gpu psx_gpu: $(OBJ) + $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) $(LDLIBS) clean: $(RM) psx_gpu $(OBJ) diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index 6aaf9adcc..fe7a8186b 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -78,14 +78,12 @@ static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyTy int num_verts = (is_quad) ? 4 : 3; le32_t *ptr; - // X,Y coords, adjusted by draw offsets - s32 x_off = gpu_unai.DrawingOffset[0]; - s32 y_off = gpu_unai.DrawingOffset[1]; + // X,Y coords ptr = &packet.U4[1]; for (int i=0; i < num_verts; ++i, ptr += vert_stride) { u32 coords = le32_to_u32(*ptr); - vbuf[i].x = GPU_EXPANDSIGN((s16)coords) + x_off; - vbuf[i].y = GPU_EXPANDSIGN((s16)(coords >> 16)) + y_off; + vbuf[i].x = GPU_EXPANDSIGN(coords); + vbuf[i].y = GPU_EXPANDSIGN(coords >> 16); } // U,V texture coords (if applicable) @@ -174,7 +172,7 @@ static inline int vertIdxOfHighestYCoord3(const T *Tptr) // or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]). // Returns true if triangle should be rendered, false if not. /////////////////////////////////////////////////////////////////////////////// -static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs) +static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs, s32 &x_off, s32 &y_off) { // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)? const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1]; @@ -195,14 +193,20 @@ static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVerte (highest_y - lowest_y) >= CHKMAX_Y) return false; + // Determine offsets + x_off = gpu_unai.DrawingOffset[0]; + y_off = gpu_unai.DrawingOffset[1]; + x_off = GPU_EXPANDSIGN(lowest_x + x_off) - lowest_x; + y_off = GPU_EXPANDSIGN(lowest_y + y_off) - lowest_y; + // Determine if triangle is completely outside clipping range int xmin, xmax, ymin, ymax; xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; - int clipped_lowest_x = Max2(xmin,lowest_x); - int clipped_lowest_y = Max2(ymin,lowest_y); - int clipped_highest_x = Min2(xmax,highest_x); - int clipped_highest_y = Min2(ymax,highest_y); + int clipped_lowest_x = Max2(xmin, lowest_x + x_off); + int clipped_lowest_y = Max2(ymin, lowest_y + y_off); + int clipped_highest_x = Min2(xmax, highest_x + x_off); + int clipped_highest_y = Min2(ymax, highest_y + y_off); if (clipped_lowest_x >= clipped_highest_x || clipped_lowest_y >= clipped_highest_y) return false; @@ -237,16 +241,17 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad do { const PolyVertex* vptrs[3]; - if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + s32 x_off, y_off; + if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off)) continue; s32 xa, xb, ya, yb; s32 x3, dx3, x4, dx4, dx; s32 x0, x1, x2, y0, y1, y2; - x0 = vptrs[0]->x; y0 = vptrs[0]->y; - x1 = vptrs[1]->x; y1 = vptrs[1]->y; - x2 = vptrs[2]->x; y2 = vptrs[2]->y; + x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off; + x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off; + x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off; ya = y2 - y0; yb = y2 - y1; @@ -395,7 +400,8 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua do { const PolyVertex* vptrs[3]; - if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + s32 x_off, y_off; + if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off)) continue; s32 xa, xb, ya, yb; @@ -405,12 +411,12 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua s32 u0, u1, u2, v0, v1, v2; s32 du4, dv4; - x0 = vptrs[0]->x; y0 = vptrs[0]->y; - u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; - x1 = vptrs[1]->x; y1 = vptrs[1]->y; - u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; - x2 = vptrs[2]->x; y2 = vptrs[2]->y; - u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; + x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off; + u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; + x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off; + u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; + x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off; + u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; ya = y2 - y0; yb = y2 - y1; @@ -719,7 +725,8 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad do { const PolyVertex* vptrs[3]; - if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + s32 x_off, y_off; + if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off)) continue; s32 xa, xb, ya, yb; @@ -729,12 +736,12 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad s32 r0, r1, r2, g0, g1, g2, b0, b1, b2; s32 dr4, dg4, db4; - x0 = vptrs[0]->x; y0 = vptrs[0]->y; - r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; - x1 = vptrs[1]->x; y1 = vptrs[1]->y; - r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; - x2 = vptrs[2]->x; y2 = vptrs[2]->y; - r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; + x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off; + r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; + x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off; + r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; + x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off; + r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; ya = y2 - y0; yb = y2 - y1; @@ -1067,7 +1074,8 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua do { const PolyVertex* vptrs[3]; - if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + s32 x_off, y_off; + if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off)) continue; s32 xa, xb, ya, yb; @@ -1080,15 +1088,15 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua s32 du4, dv4; s32 dr4, dg4, db4; - x0 = vptrs[0]->x; y0 = vptrs[0]->y; - u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; - r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; - x1 = vptrs[1]->x; y1 = vptrs[1]->y; - u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; - r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; - x2 = vptrs[2]->x; y2 = vptrs[2]->y; - u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; - r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; + x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off; + u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; + r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; + x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off; + u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; + r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; + x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off; + u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; + r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; ya = y2 - y0; yb = y2 - y1; diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 5fbb7f529..4d0963446 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -361,8 +361,8 @@ static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) case 5: { // GP0(E5h) - Set Drawing Offset (X,Y) - gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); - gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + gpu_unai.DrawingOffset[0] = GPU_EXPANDSIGN(cmd_word); + gpu_unai.DrawingOffset[1] = GPU_EXPANDSIGN(cmd_word >> 11); } break; case 6: { diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index ac9b86a33..9eb317874 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -831,10 +831,8 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, } } - if (progress_addr) { - *progress_addr = addr; + if (progress_addr && (cpu_cycles_last + cpu_cycles_sum > 512)) break; - } if (addr == ld_addr) { log_anomaly(&gpu, "GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count); break; @@ -851,6 +849,8 @@ long GPUdmaChain(uint32_t *rambase, uint32_t start_addr, gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last; gpu.state.last_list.addr = start_addr; + if (progress_addr) + *progress_addr = addr; *cycles_last_cmd = cpu_cycles_last; return cpu_cycles_sum; }