Skip to content

Commit

Permalink
Implemented downwards message passing. Moved large epiphany bsp funct…
Browse files Browse the repository at this point in the history
…ions to external memory (use the new linker script)
  • Loading branch information
Tombana committed Mar 27, 2015
1 parent 0f98011 commit 8906096
Show file tree
Hide file tree
Showing 7 changed files with 465 additions and 40 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ESDK=${EPIPHANY_HOME}
ELDF=${ESDK}/bsps/current/fast.ldf
ELDF=ebsp_fast.ldf

HOST_LIBNAME = libhost-bsp
E_LIBNAME = libe-bsp
Expand Down
363 changes: 363 additions & 0 deletions ebsp_fast.ldf
Original file line number Diff line number Diff line change
@@ -0,0 +1,363 @@
/*
fast.ldf

Copyright (C) 2012 Adapteva, Inc.
Contributed by Yaniv Sapir <[email protected]>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program, see the file COPYING. If not, see
<http://www.gnu.org/licenses/>.
*/


/* Default linker script, for regular executables */
OUTPUT_FORMAT("elf32-epiphany", "elf32-epiphany", "elf32-epiphany")
OUTPUT_ARCH(epiphany)
ENTRY(_start)

PROVIDE (___bss_start = __bss_start);

/* BSP specific */
__PROG_SIZE_FOR_CORE_ = 1M;
__HEAP_SIZE_FOR_CORE_ = 512K;
__HALF_BANK_SIZE_ = 4K;
__NUM_ROWS_IN_CHIP_ = 4;
__NUM_COLS_IN_CHIP_ = 4;
__FIRST_CORE_ROW_ = 32;
__FIRST_CORE_COL_ = 8;
__CORE_ROW_ = DEFINED(__CORE_ROW_) ? __CORE_ROW_ : __FIRST_CORE_ROW_;
__CORE_COL_ = DEFINED(__CORE_COL_) ? __CORE_COL_ : __FIRST_CORE_COL_;
/* generic don't touch */
/* used to calculate the slice address in the external memory */
__CORE_NUM_ = (__CORE_ROW_ - __FIRST_CORE_ROW_) * __NUM_COLS_IN_CHIP_ + (__CORE_COL_ - __FIRST_CORE_COL_);

__SHARED_DRAM_ = 0x8e000000;

MEMORY
{
EXTERNAL_DRAM_0 (WXAI) : ORIGIN = 0x8e000000, LENGTH = 0x01000000 /* .text, data, rodata, bss and .stack */
EXTERNAL_DRAM_1 (WXAI) : ORIGIN = 0x8f000000, LENGTH = 0x01000000 /* .heap */

/* run time lib and crt0 */
IVT_RAM (WXAI) : ORIGIN = 0, LENGTH = 0x28

/* user program, continuous placement */
WORKGROUP_RAM (WXAI) : ORIGIN = LENGTH(IVT_RAM), LENGTH = 0x30

/* user program, continuous placement */
INTERNAL_RAM (WXAI) : ORIGIN = LENGTH(IVT_RAM) + LENGTH(WORKGROUP_RAM), LENGTH = 32K - LENGTH(IVT_RAM) - LENGTH(WORKGROUP_RAM)

/* user program, per bank usage */
BANK0_SRAM (WXAI) : ORIGIN = LENGTH(IVT_RAM) + LENGTH(WORKGROUP_RAM), LENGTH = 8K - LENGTH(IVT_RAM) - LENGTH(WORKGROUP_RAM)
BANK1_SRAM (WXAI) : ORIGIN = 0x2000, LENGTH = 8K
BANK2_SRAM (WXAI) : ORIGIN = 0x4000, LENGTH = 8K
BANK3_SRAM (WXAI) : ORIGIN = 0x6000, LENGTH = 8K

/* system registers */
MMR (WAI) : ORIGIN = 0xF0000, LENGTH = 32K

/* multicore (global) space */
CORE_0x20_0x08_INTERNAL_RAM : ORIGIN = 0x80800000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x20_0x09_INTERNAL_RAM : ORIGIN = 0x80900000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x20_0x0a_INTERNAL_RAM : ORIGIN = 0x80a00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x20_0x0b_INTERNAL_RAM : ORIGIN = 0x80b00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x21_0x08_INTERNAL_RAM : ORIGIN = 0x84800000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x21_0x09_INTERNAL_RAM : ORIGIN = 0x84900000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x21_0x0a_INTERNAL_RAM : ORIGIN = 0x84a00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x21_0x0b_INTERNAL_RAM : ORIGIN = 0x84b00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x22_0x08_INTERNAL_RAM : ORIGIN = 0x88800000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x22_0x09_INTERNAL_RAM : ORIGIN = 0x88900000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x22_0x0a_INTERNAL_RAM : ORIGIN = 0x88a00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x22_0x0b_INTERNAL_RAM : ORIGIN = 0x88b00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x23_0x08_INTERNAL_RAM : ORIGIN = 0x8c800000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x23_0x09_INTERNAL_RAM : ORIGIN = 0x8c900000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x23_0x0a_INTERNAL_RAM : ORIGIN = 0x8ca00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
CORE_0x23_0x0b_INTERNAL_RAM : ORIGIN = 0x8cb00000 + LENGTH(IVT_RAM), LENGTH = 32K - LENGTH(IVT_RAM)
}

SECTIONS
{
ivt_reset 0x00 : {*.o(IVT_RESET)} > IVT_RAM
ivt_software_exception 0x04 : {*.o(ivt_entry_software_exception)} > IVT_RAM
ivt_page_miss 0x08 : {*.o(ivt_entry_page_miss)} > IVT_RAM
ivt_timer0 0x0c : {*.o(ivt_entry_timer0)} > IVT_RAM
ivt_timer1 0x10 : {*.o(ivt_entry_timer1)} > IVT_RAM
ivt_message 0x14 : {*.o(ivt_entry_message)} > IVT_RAM
ivt_dma0 0x18 : {*.o(ivt_entry_dma0)} > IVT_RAM
ivt_dma1 0x1c : {*.o(ivt_entry_dma1)} > IVT_RAM
ivt_wand 0x20 : {*.o(ivt_entry_wand)} > IVT_RAM
ivt_user 0x24 : {*.o(ivt_entry_user)} > IVT_RAM

workgroup_config 0x28 : {*.o(workgroup_cfg)} > WORKGROUP_RAM /* 28 2c 30 34 38 3c 40 44 48 (4c) */
external_mem_config 0x50 : {*.o(ext_mem_cfg)} > WORKGROUP_RAM /* 50 54 */

/* place the ISR handlers after workgroup-configuration */
.reserved_crt0 ORIGIN(IVT_RAM) + LENGTH(IVT_RAM) + LENGTH(WORKGROUP_RAM) : {*.o(RESERVED_CRT0) *.o(reserved_crt0)} > INTERNAL_RAM

. = ADDR(.reserved_crt0) + SIZEOF(.reserved_crt0);
.data_bank0 . : {*.o(.data_bank0)} > BANK0_SRAM
. = ADDR(.data_bank0) + SIZEOF(.data_bank0);
.text_bank0 . : {*.o(.text_bank0)} > BANK0_SRAM

.data_bank1 : {*.o(.data_bank1)} > BANK1_SRAM
.text_bank1 : {*.o(.text_bank1)} > BANK1_SRAM

.data_bank2 : {*.o(.data_bank2)} > BANK2_SRAM
.text_bank2 : {*.o(.text_bank2)} > BANK2_SRAM

.data_bank3 : {*.o(.data_bank3)} > BANK3_SRAM
.text_bank3 : {*.o(.text_bank3)} > BANK3_SRAM

.code_dram : {*.o(code_dram) *.o(.code_dram) } > EXTERNAL_DRAM_0
.shared_dram : {*.o(shared_dram) *.o(.shared_dram)} > EXTERNAL_DRAM_1
.heap_dram ORIGIN(EXTERNAL_DRAM_1) + LENGTH(EXTERNAL_DRAM_1) - __HEAP_SIZE_FOR_CORE_ * __NUM_ROWS_IN_CHIP_ * __NUM_COLS_IN_CHIP_ : {*.o(heap_dram) *.o(.heap_dram)} > EXTERNAL_DRAM_1

/* the newlib (libc and libm) library is mapped to the dedicated section */
/* create offset with location counter */
. = (ORIGIN(EXTERNAL_DRAM_0) + __PROG_SIZE_FOR_CORE_ * __CORE_NUM_);

NEW_LIB_RO . : {lib_a-*.o(.text .rodata) *.o(libgloss_epiphany) } > EXTERNAL_DRAM_0
NEW_LIB_WR . : {lib_a-*.o(.data) } > EXTERNAL_DRAM_0
GNU_C_BUILTIN_LIB_RO . :
{
*mulsi3.o(.text .rodata) *modsi3.o(.text .rodata)
*divsi3.o(.text .rodata) *udivsi3.o(.text .rodata)
*umodsi3.o(.text .rodata) _*.o(.text .rodata)
} > EXTERNAL_DRAM_0
EBSP_TEXT . : { } > EXTERNAL_DRAM_0
EBSP_RO . : { } > EXTERNAL_DRAM_0

/* Read-only sections, merged into text segment: */

.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.init : { *(.rela.init) }
.rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
.rela.fini : { *(.rela.fini) }
.rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
.rela.data.rel.ro : { *(.rela.data.rel.ro* .rela.gnu.linkonce.d.rel.ro.*) }
.rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
.rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
.rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
.rela.ctors : { *(.rela.ctors) }
.rela.dtors : { *(.rela.dtors) }
.rela.got : { *(.rela.got) }
.rela.sdata : { *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*) }
.rela.sbss : { *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*) }
.rela.sdata2 : { *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*) }
.rela.sbss2 : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) }
.rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
.rela.iplt :
{
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.rela.plt :
{
*(.rela.plt)
}

.init ADDR(.data_bank0) + SIZEOF(.data_bank0) :
{
KEEP (*(.init))
} > INTERNAL_RAM =0

.plt : { *(.plt) }
.iplt : { *(.iplt) }
.text :
{
*(.text.unlikely .text.*_unlikely)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
} > INTERNAL_RAM =0
.fini :
{
KEEP (*(.fini))
} > INTERNAL_RAM =0
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata1 : { *(.rodata1) }
.sdata2 :
{
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
}
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table
.gcc_except_table.*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges
.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = ALIGN(1) + (. & (1 - 1));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges*) }
/* Thread Local Storage sections */
.tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}> INTERNAL_RAM
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT(.init_array.*)))
KEEP (*(.init_array))
PROVIDE_HIDDEN (__init_array_end = .);
}> INTERNAL_RAM
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT(.fini_array.*)))
KEEP (*(.fini_array))
PROVIDE_HIDDEN (__fini_array_end = .);
}> INTERNAL_RAM
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}> INTERNAL_RAM
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}> INTERNAL_RAM
.jcr : { KEEP (*(.jcr)) }> INTERNAL_RAM
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
.data :
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}> INTERNAL_RAM
.data1 : { *(.data1) }> INTERNAL_RAM
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }> INTERNAL_RAM
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
.sdata :
{
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
. = ALIGN(8);
_edata = .; PROVIDE (edata = .);
__bss_start = .;
.sbss :
{
*(.dynsbss)
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we don't
pad the .data section. */
. = ALIGN(. != 0 ? 8 : 1);
}> INTERNAL_RAM
. = ALIGN(8);
. = ALIGN(8);
_end = .; PROVIDE (end = .);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }

/**/
PROVIDE (__stack_start_ = ORIGIN(INTERNAL_RAM) + LENGTH(INTERNAL_RAM) - 0x10);
.stack __stack_start_ : { ___stack = .; *(.stack) }
PROVIDE (___heap_start = ORIGIN(EXTERNAL_DRAM_1) + __HEAP_SIZE_FOR_CORE_ * __CORE_NUM_);
/*.heap_start __heap_start_ : { _heap_start_ = .; *(.heap_start) } */
PROVIDE (___heap_end = ORIGIN(EXTERNAL_DRAM_1) + __HEAP_SIZE_FOR_CORE_ * __CORE_NUM_ + __HEAP_SIZE_FOR_CORE_ - 4);
/* .heap_end __heap_end_ : { _heap_end_ = .; *(.heap_end) } */

/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}
3 changes: 2 additions & 1 deletion examples/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
ESDK=${EPIPHANY_HOME}
ELDF=${ESDK}/bsps/current/fast.ldf
ELDF=../ebsp_fast.ldf

CFLAGS=-std=c99 -O2 -ffast-math
CFLAGS=-std=c99 -O3 -ffast-math -funroll-loops

INCLUDES = \
-I../include\
Expand Down
2 changes: 2 additions & 0 deletions include/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ typedef struct
// ARM --> Epiphany
float remotetimer;
int nprocs;
int initial_tagsize;

// Epiphany <--> Epiphany
void* bsp_var_list[MAX_BSP_VARS][_NPROCS];
Expand Down Expand Up @@ -179,6 +180,7 @@ typedef struct
#define STATE_CONTINUE 3
#define STATE_FINISH 4
#define STATE_INIT 5
#define STATE_EREADY 6

// Clockspeed of Epiphany in cycles/second
// This was 'measured' by comparing with ARM wall-time measurements
Expand Down
Loading

0 comments on commit 8906096

Please sign in to comment.