Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full Pi zero 2W support with capture in the GPU on VPU core 2 #251

Merged
merged 48 commits into from
Nov 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
4da9c63
Added a profile for the Swedish computer ABC80
Aug 31, 2021
129d90e
Update ABC80.txt
sweproj Sep 17, 2021
72ba364
Added NEC PC-98 and Epson clones profiles.
cuba200611 Oct 22, 2021
9a1b1b0
Add vasm assembler for videocore IV
IanSB Nov 15, 2021
8e4797f
GPU videocore capture source
IanSB Nov 15, 2021
ec1e3a7
Add default 9bpp & 1bpp simple modes
IanSB Nov 15, 2021
a9bdd08
Update in band capture for GPU
IanSB Nov 15, 2021
efcb123
Update mode7 capture for GPU
IanSB Nov 15, 2021
1231e35
Update NTSC capture for GPU
IanSB Nov 15, 2021
8216d26
Update mailbox interface for GPU
IanSB Nov 15, 2021
19d63b7
Update overclock limits for Pi zero 2
IanSB Nov 15, 2021
cb3eaee
Add GPU capture for Pi zero and Pi zero 2W
IanSB Nov 15, 2021
c55af86
Improve benchmarks
IanSB Nov 15, 2021
a6f8fbe
Update videocore comments
IanSB Nov 15, 2021
f4da323
Fix stall issue with 12bpp capture into an 8bpp frame buffer
IanSB Nov 16, 2021
5115325
Mark all samples with FINAL_BIT at end of capture
IanSB Nov 16, 2021
7b1176d
Optimise MBOX reading code
IanSB Nov 16, 2021
744a042
Update RPI2 support
IanSB Nov 16, 2021
8124a82
Add arm capture build option
IanSB Nov 16, 2021
85986ca
tweak pipelining
dp111 Nov 16, 2021
4405c28
Run Arm capture on Pi 1
IanSB Nov 16, 2021
d5ad430
Fix issue with cache preload on GPU build
IanSB Nov 16, 2021
d168b7e
Add build info to bootup message
IanSB Nov 16, 2021
fbc542c
remove remaining support for very old CPLDs in GPU build
IanSB Nov 16, 2021
1cfb793
Include Sam Coupe in hidden profiles list
IanSB Nov 16, 2021
7e3ff3f
Re-add cache preload in in band signalling
IanSB Nov 16, 2021
9fb5b05
Update line timeout
IanSB Nov 16, 2021
62368d1
Run old CPLDs on ARM build only
IanSB Nov 16, 2021
c90b12c
Re-order some instructions in SKIP_PSYNC
IanSB Nov 16, 2021
e3e48e9
Update profiles
IanSB Nov 16, 2021
29c1b7b
Simplify font selection
IanSB Nov 16, 2021
4537048
Add videocore build script
IanSB Nov 16, 2021
ba87469
Update defaults in Amiga CPLD
IanSB Nov 17, 2021
e64eace
Update config.txt
IanSB Nov 17, 2021
4b939cf
Fix Pi4 SD card not working
IanSB Nov 17, 2021
af49a63
Workarounds to get Pi4 running on PLLA
IanSB Nov 18, 2021
f0da5c1
Merge pull request #5 from dp111/patch-16
IanSB Nov 18, 2021
b008efb
Fix RAM speed value
IanSB Nov 19, 2021
3e065e3
Fix pll calculation on Pi4
IanSB Nov 19, 2021
01ed279
Update configure_pll
IanSB Nov 19, 2021
5c798a6
Fix Pi2 screen start
IanSB Nov 19, 2021
208be4c
Update cached screen
IanSB Nov 19, 2021
a9d3970
Read start of screen from mailbox when setting cached area
IanSB Nov 20, 2021
24658f1
Added a profile for the Swedish computer ABC80
IanSB Nov 20, 2021
8d0ca8b
Added NEC PC-98 and Epson clones profiles.
IanSB Nov 20, 2021
15722d1
Merge branch 'pulls/60232493/235' into dev
IanSB Nov 20, 2021
e82aa83
Merge branch 'pulls/60232493/242' into dev
IanSB Nov 20, 2021
ff5cfe6
Fix garbage on right hand of screen in teletext mode with GPU capture
IanSB Nov 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
27 changes: 11 additions & 16 deletions src/cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ void map_4k_page(int logical, int physical) {
#endif
}

void enable_MMU_and_IDCaches(void)
void enable_MMU_and_IDCaches(int cached_screen_area, int cached_screen_size)
{

log_debug("enable_MMU_and_IDCaches");
Expand Down Expand Up @@ -193,21 +193,7 @@ void enable_MMU_and_IDCaches(void)
{
PageTable[base] = base << 20 | 0x04C02 | (shareable << 16) | (bb << 12);
}
#if defined(USE_CACHED_SCREEN)
for (; base < (SCREEN_START >> 20); base++)
{
PageTable[base] = base << 20 | 0x01C02; //uncached area before screen
}
for (; base < ((SCREEN_START + CACHED_SCREEN_OFFSET) >> 20); base++)
{
PageTable[base] = base << 20 | 0x01C02; //uncached part of screen ram
}
for (; base < ((SCREEN_START + SCREEN_SIZE) >> 20); base++)
{
PageTable[base] = base << 20 | 0x04C02 | (shareable << 16) | (bb << 12) | (aa << 2); //cached part of screen ram
}
#endif
for (; base < uncached_threshold; base++) // < 0x3F000000
for (; base < uncached_threshold; base++)
{
PageTable[base] = base << 20 | 0x01C02;
}
Expand All @@ -217,6 +203,15 @@ void enable_MMU_and_IDCaches(void)
PageTable[base] = base << 20 | 0x10C16;
}

#if defined(USE_CACHED_SCREEN)
if (cached_screen_area != 0) {
for (base = (cached_screen_area >> 20); base < ((cached_screen_area + cached_screen_size) >> 20); base++)
{
PageTable[base] = base << 20 | 0x04C02 | (shareable << 16) | (bb << 12) | (aa << 2); //cached part of screen ram
}
}
#endif

// suppress a warning as we really do want to copy from src address 0!
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wnonnull"
Expand Down
2 changes: 1 addition & 1 deletion src/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

void map_4k_page(int logical, int physical);

void enable_MMU_and_IDCaches(void);
void enable_MMU_and_IDCaches(int cached_screen_area, int cached_screen_size);

void CleanDataCache (void);

Expand Down
26 changes: 2 additions & 24 deletions src/capture_line_default_twelvebits_8bpp_16bpp.S
Original file line number Diff line number Diff line change
Expand Up @@ -53,31 +53,9 @@ loop_8bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_3 r6 // input in r8

WRITE_R5_R6_IF_LAST
cmp r1, #1
popeq {r0, pc}
WRITE_R5_R6

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_0 r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_1 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_2 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_3 r7 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_0 r12 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_1 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_2 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_3 r10 // input in r8

WRITE_R5_R6_R7_R10

subs r1, r1, #2
subs r1, r1, #1
bne loop_8bpp

pop {r0, pc}
Expand Down
352 changes: 254 additions & 98 deletions src/capture_line_fast_simple_16bpp.S

Large diffs are not rendered by default.

26 changes: 2 additions & 24 deletions src/capture_line_fast_twelvebits_8bpp_16bpp.S
Original file line number Diff line number Diff line change
Expand Up @@ -54,30 +54,8 @@ loop_8bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_3 r6 // input in r8

cmp r1, #1
stmeqia r0, {r5, r6}
popeq {r0, pc}

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_0 r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_1 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_2 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_3 r7 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_0 r12 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_1 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_2 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_EIGHT_BITS_8BPP_3 r10 // input in r8

stmia r0!, {r5, r6, r7, r10}
subs r1, r1, #2
stmia r0!, {r5, r6}
subs r1, r1, #1
bne loop_8bpp

pop {r0, pc}
Expand Down
11 changes: 4 additions & 7 deletions src/capture_line_inband_4bpp_8bpp.S
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ preload_capture_line_inband_4bpp:
bl preload_foundmode0inband
mov r1, #6
bl preload_foundmode0to6inband
exitpreload:
pop {r0, r1}
str r0, paletteFlags
str r1, inBandPointer
Expand Down Expand Up @@ -324,8 +325,8 @@ blank0to6loop_8bpp:
str r8, paletteFlags
pop {r0, pc}


preload_capture_line_inband_8bpp:

push {lr}
ldr r0, paletteFlags
ldr r1, inBandPointer
Expand All @@ -334,13 +335,9 @@ preload_capture_line_inband_8bpp:
str r0, paletteFlags //disable flags
SETUP_DUMMY_PARAMETERS
bl capture_line_inband_8bpp
#ifndef USE_MULTICORE
mov r1, #3
bl preload_foundmode0inband_8bpp
mov r1, #6
bl preload_foundmode0to6inband_8bpp
#endif
pop {r0, r1}
str r0, paletteFlags
str r1, inBandPointer
pop {pc}
b exitpreload

33 changes: 19 additions & 14 deletions src/capture_line_mode7_4bpp.S
Original file line number Diff line number Diff line change
Expand Up @@ -250,19 +250,7 @@ process_chars_loop_7_bob:

.align 6
.ltorg
// all 6 osd buffers must be sequential
osdbuffer3:
.word 0
osdbufferA1:
.word 0
osdbufferA2:
.word 0
osdbufferA3:
.word 0
charline:
.word 0
r1save:
.word 0


process_chars_7_advanced:
// Advanced deinterlace
Expand All @@ -284,8 +272,9 @@ process_chars_7_advanced:
// r12 = pointer into frame buffer (moves within line)
// r14 = misc
str r6, charline

add r1, r1, #1 //fix a rounding issue in GPU capture mode
SKIP_PSYNC
sub r1, r1, #1
mov r1, r1, lsr #1

process_chars_loop_7_advanced:
Expand Down Expand Up @@ -471,6 +460,22 @@ process_chars_loop_7_advanced:
beq deinterlace1
cmp r9, r14
beq nodeinterlace1 // if rounding pair then don't deinterlace
b deinterlace1
// all 6 osd buffers must be sequential
.align 6
osdbuffer3:
.word 0
osdbufferA1:
.word 0
osdbufferA2:
.word 0
osdbufferA3:
.word 0
charline:
.word 0
r1save:
.word 0


deinterlace1:
ldr r14, osdbufferA1 // get OSD bits
Expand Down
49 changes: 27 additions & 22 deletions src/capture_line_ntsc_8bpp.S
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,9 @@

#include "macros.S"

#ifdef USE_MULTICORE
#ifdef USE_ARM_CAPTURE
.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC
SKIP_PSYNC_COMMON_NO_OLD_CPLD
ldr r14, =read_pointer
add r8, r1, r7
add r8, r8, #4 //add 4 to compensate for preloading samples
str r8, [r14, #20] //capture_count
dmb // wait until memory written
mov r9, #0
skip_psync_loop_no_oldL\@:
WAIT_FOR_PSYNC_EDGE_FAST // wait for next edge of psync
Expand All @@ -25,48 +20,57 @@ skip_psync_loop_no_oldL\@:
moveq r11, #0
// exit with r9 non-zero if burst detected, r11=red linem
.endm

.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC_3BPP
SKIP_PSYNC_NO_OLD_CPLD_NTSC
.endm

#else

.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC
SKIP_PSYNC_COMMON_NO_OLD_CPLD
ldr r14, =read_pointer
add r8, r1, r7
add r8, r8, #2 //add 2 to compensate for preloading samples
str r8, [r14, #20] //capture_count
dmb // wait until memory written
ldr r4, =GPU_DATA_0
add r8, r7, r1
add r8, r8, #1 + 4
mov r8, r8, lsr #1
str r8, [r4, #(GPU_COMMAND_offset - GPU_DATA_0_offset)] //command register
mov r9, #0
skip_psync_loop_no_oldL3\@:
skip_psync_loop_no_oldL6\@:
WAIT_FOR_PSYNC_EDGE_FAST // wait for next edge of psync
ands r10, r8, #(0x10 << PIXEL_BASE)
orr r9, r9, r10
and r10, r8, #(0x10 << (PIXEL_BASE + 6))
orr r9, r9, r10
subs r7, r7, #1
bne skip_psync_loop_no_oldL3\@
bne skip_psync_loop_no_oldL6\@
push {r14} //save timestamp
tst r3, #BIT_VSYNC_MARKER
ldrne r11, =0x01010101
moveq r11, #0
// exit with r9 non-zero if burst detected, r11=red linem
.endm
#else
.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC

.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC_3BPP
SKIP_PSYNC_COMMON_NO_OLD_CPLD
ldr r4, =GPU_DATA_0
add r8, r7, r1
add r8, r8, #1 + 2
mov r8, r8, lsr #1
str r8, [r4, #(GPU_COMMAND_offset - GPU_DATA_0_offset)] //command register
mov r9, #0
skip_psync_loop_no_oldL\@:
skip_psync_loop_no_oldL3\@:
WAIT_FOR_PSYNC_EDGE_FAST // wait for next edge of psync
ands r10, r8, #(0x10 << PIXEL_BASE)
orr r9, r9, r10
and r10, r8, #(0x10 << (PIXEL_BASE + 6))
orr r9, r9, r10
subs r7, r7, #1
bne skip_psync_loop_no_oldL\@
bne skip_psync_loop_no_oldL3\@
push {r14} //save timestamp
tst r3, #BIT_VSYNC_MARKER
ldrne r11, =0x01010101
moveq r11, #0
// exit with r9 non-zero if burst detected, r11=red linem
.endm
.macro SKIP_PSYNC_NO_OLD_CPLD_NTSC_3BPP
SKIP_PSYNC_NO_OLD_CPLD_NTSC
.endm
#endif

.macro NTSC_CAPTURE_BITS_8BPP_MONO
Expand Down Expand Up @@ -898,6 +902,7 @@ loop_8bpp6:

pop {r0, pc}

.ltorg
preload_capture_line_ntsc_sixbits_8bpp_cga:
PRELOAD_BITCOUNT
SETUP_DUMMY_PARAMETERS
Expand Down Expand Up @@ -1206,7 +1211,7 @@ preload_capture_line_ntsc_sixbits_8bpp_mono_auto:
.ltorg

full_capture_line_ntsc_sixbits_8bpp_mono_auto:
SKIP_PSYNC_NO_OLD_CPLD_NTSC // returns r9 != 0 if burst detected
SKIP_PSYNC_NO_OLD_CPLD_NTSC // returns r9 != 0 if burst detected
mov r1, r1, lsr #2
eor r8, r12, #NTSC_Y_INVERT // invert signal so tests can cascade
cmp r9, #0 //no burst?
Expand Down
Loading