diff --git a/deepsocflow/c/dsf_zynq.c b/deepsocflow/c/dsf_zynq.c index 3ffb70b..33da8f6 100644 --- a/deepsocflow/c/dsf_zynq.c +++ b/deepsocflow/c/dsf_zynq.c @@ -8,7 +8,7 @@ #include #include #include -#define printf xil_printf +#include #define MEM_BASEADDR 0x20000000 @@ -21,20 +21,21 @@ XAxiDma dma_pixels, dma_weights, dma_output; XScuGic intr_controller; // Generic interrupt controller u32 status; + static void start_wait_output(UINTPTR baseaddr, u32 bpt){ int status = XAxiDma_SimpleTransfer(&dma_output , baseaddr, bpt, XAXIDMA_DEVICE_TO_DMA); if (status != XST_SUCCESS) xil_printf("S2MM transfer failed, base:%p, bpt:%d\n", baseaddr, bpt); while(!done_output); - printf("Done output dma at :%p, bpt:%d\n", baseaddr, bpt); + xil_printf("Done output dma at :%p, bpt:%d\n", baseaddr, bpt); Xil_DCacheFlushRange((INTPTR)baseaddr, bpt); done_output = 0; } static void start_pixels_dma(); +#define printf xil_printf #include "runtime.h" - - +#undef printf static void start_pixels_dma() { @@ -84,6 +85,8 @@ static void setup_interrupt(XScuGic *p_intr_controller, u32 intr_id, Xil_Interru int main() { init_platform(); + + xil_printf("Store wbx at: %p; y:%p; buffers {0:%p,1:%p}; debug_nhwc:%p; debug_tiled:%p \n", &mem.w, &mem.y, &mem.out_buffers[0], &mem.out_buffers[1], &mem.debug_nhwc, &mem.debug_tiled); print("Starting!!!\n\r"); @@ -127,8 +130,9 @@ int main() { xil_printf("Done inference: %d \n", 0); Xil_DCacheFlushRange((INTPTR)&mem.y, O_WORDS*sizeof(O_TYPE)); // force transfer to DDR, starting addr & length - for (int i=0; i<20; i++) - xil_printf("y[%d]: %d \n", i, mem.y[i]); + for (int i=0; isoftmax_frac); val = val - pb->softmax_max_f; -#ifdef SIM val = (float)exp(val); -#endif mem.y[iy_nhwc] = val; if (i_yc == pb->co-1) { diff --git a/deepsocflow/py/model.py b/deepsocflow/py/model.py index 5b2eda8..0ca9d2f 100644 --- a/deepsocflow/py/model.py +++ b/deepsocflow/py/model.py @@ -148,7 +148,7 @@ def export_inference(self, x, hw): out_buffer_idx = 1*(not out_buffer_idx) if ib != len(bundles)-1 else -1 # alternate between 0 and 1 - ch.write(f" {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .coe_tl={y_coe_tl:<3}, .r_ll={y_r_ll:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<3}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .xp_words={xp_words:<3}, ") + ch.write(f" {{.n={b.r.XN:<3}, .l={b.r.XL:<3}, .kw={b.r.KW:<3}, .coe={y_coe:<3}, .coe_tl={y_coe_tl:<3}, .r_ll={y_r_ll:<3}, .h={b.r.XH:<3}, .w={b.r.XW:<3}, .ci={b.r.CI:<4}, .co={b.r.CO:<3}, .w_kw2={b.r.XW-b.r.KW//2:<3}, .t={b.r.IT:<3}, .p={b.r.CP:<3}, .cm={b.r.CM:<3}, .cm_p0={b.r.CM_0:<3}, .xp_words={xp_words:<4}, ") ch.write( f".w_bpt={w_bpt:<5}, .w_bpt_p0={w_bpt_p0:<5}, .x_bpt={x_bpt:<5}, .x_bpt_p0={x_bpt_p0:<5}, .o_words={o_words_b:<5}, .o_bytes={o_bytes_b:<5}, ") ch.write( f".out_buffer_idx={out_buffer_idx:<2}, .add_out_buffer_idx={add_out_buffer_idx:<2}, .add_in_buffer_idx={add_in_buffer_idx:<2}, ") ch.write( f".is_bias={1*(b.b is not None):<3}, .is_flatten={1*b.flatten:<3}, .is_softmax={1*b.softmax:<3}, ")