diff --git a/Annotated code.pdf b/Annotated code.pdf new file mode 100644 index 0000000..2264dfc Binary files /dev/null and b/Annotated code.pdf differ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..39eaab2 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,38 @@ +add_executable(pio_ws2812) + +# generate the header file into the source tree as it is included in the RP2040 datasheet +pico_generate_pio_header(pio_ws2812 ${CMAKE_CURRENT_LIST_DIR}/ws2812.pio OUTPUT_DIR ${CMAKE_CURRENT_LIST_DIR}/generated) + +target_sources(pio_ws2812 PRIVATE ws2812.c) + +target_link_libraries(pio_ws2812 PRIVATE pico_stdlib hardware_pio) +pico_add_extra_outputs(pio_ws2812) + +pico_enable_stdio_usb(pio_ws2812 1) +pico_enable_stdio_uart(pio_ws2812 0) + +# add url via pico_set_program_url +example_auto_set_url(pio_ws2812) + +add_executable(pio_ws2812_parallel) + +pico_generate_pio_header(pio_ws2812_parallel ${CMAKE_CURRENT_LIST_DIR}/ws2812.pio OUTPUT_DIR ${CMAKE_CURRENT_LIST_DIR}/generated) + +target_sources(pio_ws2812_parallel PRIVATE ws2812_parallel.c) + +target_compile_definitions(pio_ws2812_parallel PRIVATE + PIN_DBG1=3) + +target_link_libraries(pio_ws2812_parallel PRIVATE pico_stdlib hardware_pio hardware_dma) +pico_add_extra_outputs(pio_ws2812_parallel) + +# add url via pico_set_program_url +example_auto_set_url(pio_ws2812_parallel) + +# Additionally generate python and hex pioasm outputs for inclusion in the RP2040 datasheet +add_custom_target(pio_ws2812_datasheet DEPENDS ${CMAKE_CURRENT_LIST_DIR}/generated/ws2812.py) +add_custom_command(OUTPUT ${CMAKE_CURRENT_LIST_DIR}/generated/ws2812.py + DEPENDS ${CMAKE_CURRENT_LIST_DIR}/ws2812.pio + COMMAND Pioasm -o python ${CMAKE_CURRENT_LIST_DIR}/ws2812.pio ${CMAKE_CURRENT_LIST_DIR}/generated/ws2812.py + ) +add_dependencies(pio_ws2812 pio_ws2812_datasheet) \ No newline at end of file diff --git a/Paper Modelling.pdf b/Paper Modelling.pdf new file mode 100644 index 0000000..032de29 Binary files /dev/null and b/Paper Modelling.pdf differ diff --git a/README.md b/README.md index a8be84b..c67e193 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,39 @@ University of Pennsylvania, ESE 5190: Intro to Embedded Systems, Lab 2A - (TODO) YOUR NAME HERE - (TODO) LinkedIn, personal website, twitter, etc. - Tested on: (TODO) MacBook Pro (14-inch, 2021), macOS Monterey 12.5.1 + Lab2A-Worked-with-Sushrut-Salil-Thakur -(TODO: Your README) +University of Pennsylvania, ESE 5190: Intro to Embedded Systems, Lab 2A + + Juilee Samir Kotnis + Tested on: Lenovo Legion , 8 GB ram (15.6-inch, 2020), Windows 11 + +Answers to Questions in Section 3.2 of the Lab 2A assisgnment: + +Why is bit-banging impractical on your laptop, despite it having a much faster processor than the RP2040? + +Bit banging is a method of data transmission using software instead of dedicated hardware to generate transmitted signals and process received signals through GPIO pins.Since more communication errors like glitches and jitters occur when bit banging is used especially when data communication is being performed by the processor at the same time as other tasks. + +What are some cases where directly using the GPIO might be a better choice than using the PIO hardware? + +Compared to PIO hardware GPIO can be used in cases where low priority tasks such as switching on a LCD display which does not need to be done in a loop and is not time critical. + +How do you get data into a PIO state machine? + +There is a TX First in First Out register in every state machine which will store the dta loaded by the software and then the state machine is able to read data from this register. + +How do you get data out of a PIO state machine? + +There is a RX First in First Out register in every state machine which will send the data and the state machine will read it form this register. + +How do you program a PIO state machine? + +The PIO has 4 independent state machines whcih share instruction memory. When the software loads data in this instruction memory, it sets the input/output mapping and thus programs a PIO state machine. + +In the example, which low-level C SDK function is directly responsible for telling the PIO to set the LED to a new color? How is this function accessed from the main “application” code? + +'pio_sm_put_blocking' function in the code sets the colors to the builtin LED. We need to first incluse the sdk of the pico-examples. Then add that pico sdk link libraries to the CMakeLists.txt file: picostdlib.h and hardware/pio.h b. In the main code call the pio_sm_put_blocking functions with the following parameters: pio instance, state machine instance, 32 bit color data. + +What role does the pioasm “assembler” play in the example, and how does this interact with CMake? + +The pioasm is an assembler which translates the assmebly code from the pio.h file into the binary code which needs to to be stored in the state machine. In the CMake file there is a function pico_generate_pio_header(TARGETPIO_FILE) which invokes the Pioasm, which makes it easier as we dont have to invoke it from the SDK. -Include lab questions, screenshots, analysis, etc. (Remember, this is public, so don't put anything here you don't want to share with the world.) diff --git a/Register Spreadsheet.xlsx b/Register Spreadsheet.xlsx new file mode 100644 index 0000000..386704c Binary files /dev/null and b/Register Spreadsheet.xlsx differ diff --git a/Timing-diagram.png b/Timing-diagram.png new file mode 100644 index 0000000..de71b17 Binary files /dev/null and b/Timing-diagram.png differ diff --git a/gitignore b/gitignore new file mode 100644 index 0000000..46f42f8 --- /dev/null +++ b/gitignore @@ -0,0 +1,11 @@ +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps diff --git a/ws2812.c b/ws2812.c new file mode 100644 index 0000000..71833bb --- /dev/null +++ b/ws2812.c @@ -0,0 +1,121 @@ +/** + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include "pico/stdlib.h" +#include "hardware/pio.h" +#include "hardware/clocks.h" +#include "ws2812.pio.h" + +#define IS_RGBW true +#define NUM_PIXELS 150 + +#ifdef PICO_DEFAULT_WS2812_PIN +#define WS2812_PIN PICO_DEFAULT_WS2812_PIN +#else +#define WS2812_PIN 12 +#endif +// default to pin 2 if the board doesn't have a default WS2812 pin defined + +static inline void put_pixel(uint32_t pixel_grb) { + pio_sm_put_blocking(pio0, 0, pixel_grb << 8u); +} + +static inline uint32_t urgb_u32(uint8_t r, uint8_t g, uint8_t b) { + return + ((uint32_t) (r) << 8) | + ((uint32_t) (g) << 16) | + (uint32_t) (b); +} + +void pattern_snakes(uint len, uint t) { + for (uint i = 0; i < len; ++i) { + uint x = (i + (t >> 1)) % 64; + if (x < 10) + put_pixel(urgb_u32(0xff, 0, 0)); + else if (x >= 15 && x < 25) + put_pixel(urgb_u32(0, 0xff, 0)); + else if (x >= 30 && x < 40) + put_pixel(urgb_u32(0, 0, 0xff)); + else + put_pixel(0); + } +} + +void pattern_random(uint len, uint t) { + if (t % 8) + return; + for (int i = 0; i < len; ++i) + put_pixel(rand()); +} + +void pattern_sparkle(uint len, uint t) { + if (t % 8) + return; + for (int i = 0; i < len; ++i) + put_pixel(rand() % 16 ? 0 : 0xffffffff); +} + +void pattern_greys(uint len, uint t) { + int max = 100; // let's not draw too much current! + t %= max; + for (int i = 0; i < len; ++i) { + put_pixel(t * 0x10101); + if (++t >= max) t = 0; + } +} + +typedef void (*pattern)(uint len, uint t); +const struct { + pattern pat; + const char *name; +} pattern_table[] = { + {pattern_snakes, "Snakes!"}, + {pattern_random, "Random data"}, + {pattern_sparkle, "Sparkles"}, + {pattern_greys, "Greys"}, +}; + +int main() { + //set_sys_clock_48(); + int gpio = PICO_DEFAULT_WS2812_POWER_PIN; + gpio_init(gpio); + gpio_set_dir(gpio, GPIO_OUT); + gpio_put(gpio,1); + stdio_init_all(); + printf("WS2812 Smoke Test, using pin %d", WS2812_PIN); + + // todo get free sm + PIO pio = pio0; + int sm = 0; + uint offset = pio_add_program(pio, &ws2812_program); + + ws2812_program_init(pio, sm, offset, WS2812_PIN, 800000, IS_RGBW); + + int t = 0; + while (1) { + int pat = rand() % count_of(pattern_table); + int dir = (rand() >> 30) & 1 ? 1 : -1; + puts(pattern_table[pat].name); + puts(dir == 1 ? "(forward)" : "(backward)"); + for (int i = 0; i < 1000; ++i) { + pattern_table[pat].pat(NUM_PIXELS, t); + sleep_ms(10); + t += dir; + } + } + + while (true) { + printf("Hello, world!\n"); + put_pixel(urgb_u32(0xff, 0, 0)); + sleep_ms(1000); + put_pixel(0); + sleep_ms(1000); + } + return 0; +} diff --git a/ws2812.pio b/ws2812.pio new file mode 100644 index 0000000..3c31fd6 --- /dev/null +++ b/ws2812.pio @@ -0,0 +1,85 @@ +; +; Copyright (c) 2020 Raspberry Pi (Trading) Ltd. +; +; SPDX-License-Identifier: BSD-3-Clause +; + +.program ws2812 +.side_set 1 + +.define public T1 2 +.define public T2 5 +.define public T3 3 + +.lang_opt python sideset_init = pico.PIO.OUT_HIGH +.lang_opt python out_init = pico.PIO.OUT_HIGH +.lang_opt python out_shiftdir = 1 + +.wrap_target +bitloop: + out x, 1 side 0 [T3 - 1] ; Side-set still takes place when instruction stalls + jmp !x do_zero side 1 [T1 - 1] ; Branch on the bit we shifted out. Positive pulse +do_one: + jmp bitloop side 1 [T2 - 1] ; Continue driving high, for a long pulse +do_zero: + nop side 0 [T2 - 1] ; Or drive low, for a short pulse +.wrap + +% c-sdk { +#include "hardware/clocks.h" + +static inline void ws2812_program_init(PIO pio, uint sm, uint offset, uint pin, float freq, bool rgbw) { + + pio_gpio_init(pio, pin); + pio_sm_set_consecutive_pindirs(pio, sm, pin, 1, true); + + pio_sm_config c = ws2812_program_get_default_config(offset); + sm_config_set_sideset_pins(&c, pin); + sm_config_set_out_shift(&c, false, true, rgbw ? 32 : 24); + sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX); + + int cycles_per_bit = ws2812_T1 + ws2812_T2 + ws2812_T3; + float div = clock_get_hz(clk_sys) / (freq * cycles_per_bit); + sm_config_set_clkdiv(&c, div); + + pio_sm_init(pio, sm, offset, &c); + pio_sm_set_enabled(pio, sm, true); +} +%} + +.program ws2812_parallel + +.define public T1 2 +.define public T2 5 +.define public T3 3 + +.wrap_target + out x, 32 + mov pins, !null [T1-1] + mov pins, x [T2-1] + mov pins, null [T3-2] +.wrap + +% c-sdk { +#include "hardware/clocks.h" + +static inline void ws2812_parallel_program_init(PIO pio, uint sm, uint offset, uint pin_base, uint pin_count, float freq) { + for(uint i=pin_base; i +#include +#include + +#include "pico/stdlib.h" +#include "pico/sem.h" +#include "hardware/pio.h" +#include "hardware/dma.h" +#include "hardware/irq.h" +#include "ws2812.pio.h" + +#define FRAC_BITS 4 +#define NUM_PIXELS 64 +#define WS2812_PIN_BASE 2 + +// horrible temporary hack to avoid changing pattern code +static uint8_t *current_string_out; +static bool current_string_4color; + +static inline void put_pixel(uint32_t pixel_grb) { + *current_string_out++ = pixel_grb & 0xffu; + *current_string_out++ = (pixel_grb >> 8u) & 0xffu; + *current_string_out++ = (pixel_grb >> 16u) & 0xffu; + if (current_string_4color) { + *current_string_out++ = 0; // todo adjust? + } +} + +static inline uint32_t urgb_u32(uint8_t r, uint8_t g, uint8_t b) { + return + ((uint32_t) (r) << 8) | + ((uint32_t) (g) << 16) | + (uint32_t) (b); +} + +void pattern_snakes(uint len, uint t) { + for (uint i = 0; i < len; ++i) { + uint x = (i + (t >> 1)) % 64; + if (x < 10) + put_pixel(urgb_u32(0xff, 0, 0)); + else if (x >= 15 && x < 25) + put_pixel(urgb_u32(0, 0xff, 0)); + else if (x >= 30 && x < 40) + put_pixel(urgb_u32(0, 0, 0xff)); + else + put_pixel(0); + } +} + +void pattern_random(uint len, uint t) { + if (t % 8) + return; + for (int i = 0; i < len; ++i) + put_pixel(rand()); +} + +void pattern_sparkle(uint len, uint t) { + if (t % 8) + return; + for (int i = 0; i < len; ++i) + put_pixel(rand() % 16 ? 0 : 0xffffffff); +} + +void pattern_greys(uint len, uint t) { + int max = 100; // let's not draw too much current! + t %= max; + for (int i = 0; i < len; ++i) { + put_pixel(t * 0x10101); + if (++t >= max) t = 0; + } +} + +void pattern_solid(uint len, uint t) { + t = 1; + for (int i = 0; i < len; ++i) { + put_pixel(t * 0x10101); + } +} + +int level = 8; + +void pattern_fade(uint len, uint t) { + uint shift = 4; + + uint max = 16; // let's not draw too much current! + max <<= shift; + + uint slow_t = t / 32; + slow_t = level; + slow_t %= max; + + static int error; + slow_t += error; + error = slow_t & ((1u << shift) - 1); + slow_t >>= shift; + slow_t *= 0x010101; + + for (int i = 0; i < len; ++i) { + put_pixel(slow_t); + } +} + +typedef void (*pattern)(uint len, uint t); +const struct { + pattern pat; + const char *name; +} pattern_table[] = { + {pattern_snakes, "Snakes!"}, + {pattern_random, "Random data"}, + {pattern_sparkle, "Sparkles"}, + {pattern_greys, "Greys"}, +// {pattern_solid, "Solid!"}, +// {pattern_fade, "Fade"}, +}; + +#define VALUE_PLANE_COUNT (8 + FRAC_BITS) +// we store value (8 bits + fractional bits of a single color (R/G/B/W) value) for multiple +// strings, in bit planes. bit plane N has the Nth bit of each string. +typedef struct { + // stored MSB first + uint32_t planes[VALUE_PLANE_COUNT]; +} value_bits_t; + +// Add FRAC_BITS planes of e to s and store in d +void add_error(value_bits_t *d, const value_bits_t *s, const value_bits_t *e) { + uint32_t carry_plane = 0; + // add the FRAC_BITS low planes + for (int p = VALUE_PLANE_COUNT - 1; p >= 8; p--) { + uint32_t e_plane = e->planes[p]; + uint32_t s_plane = s->planes[p]; + d->planes[p] = (e_plane ^ s_plane) ^ carry_plane; + carry_plane = (e_plane & s_plane) | (carry_plane & (s_plane ^ e_plane)); + } + // then just ripple carry through the non fractional bits + for (int p = 7; p >= 0; p--) { + uint32_t s_plane = s->planes[p]; + d->planes[p] = s_plane ^ carry_plane; + carry_plane &= s_plane; + } +} + +typedef struct { + uint8_t *data; + uint data_len; + uint frac_brightness; // 256 = *1.0; +} string_t; + +// takes 8 bit color values, multiply by brightness and store in bit planes +void transform_strings(string_t **strings, uint num_strings, value_bits_t *values, uint value_length, + uint frac_brightness) { + for (uint v = 0; v < value_length; v++) { + memset(&values[v], 0, sizeof(values[v])); + for (int i = 0; i < num_strings; i++) { + if (v < strings[i]->data_len) { + // todo clamp? + uint32_t value = (strings[i]->data[v] * strings[i]->frac_brightness) >> 8u; + value = (value * frac_brightness) >> 8u; + for (int j = 0; j < VALUE_PLANE_COUNT && value; j++, value >>= 1u) { + if (value & 1u) values[v].planes[VALUE_PLANE_COUNT - 1 - j] |= 1u << i; + } + } + } + } +} + +void dither_values(const value_bits_t *colors, value_bits_t *state, const value_bits_t *old_state, uint value_length) { + for (uint i = 0; i < value_length; i++) { + add_error(state + i, colors + i, old_state + i); + } +} + +// requested colors * 4 to allow for RGBW +static value_bits_t colors[NUM_PIXELS * 4]; +// double buffer the state of the string, since we update next version in parallel with DMAing out old version +static value_bits_t states[2][NUM_PIXELS * 4]; + +// example - string 0 is RGB only +static uint8_t string0_data[NUM_PIXELS * 3]; +// example - string 1 is RGBW +static uint8_t string1_data[NUM_PIXELS * 4]; + +string_t string0 = { + .data = string0_data, + .data_len = sizeof(string0_data), + .frac_brightness = 0x40, +}; + +string_t string1 = { + .data = string1_data, + .data_len = sizeof(string1_data), + .frac_brightness = 0x100, +}; + +string_t *strings[] = { + &string0, + &string1, +}; + +// bit plane content dma channel +#define DMA_CHANNEL 0 +// chain channel for configuring main dma channel to output from disjoint 8 word fragments of memory +#define DMA_CB_CHANNEL 1 + +#define DMA_CHANNEL_MASK (1u << DMA_CHANNEL) +#define DMA_CB_CHANNEL_MASK (1u << DMA_CB_CHANNEL) +#define DMA_CHANNELS_MASK (DMA_CHANNEL_MASK | DMA_CB_CHANNEL_MASK) + +// start of each value fragment (+1 for NULL terminator) +static uintptr_t fragment_start[NUM_PIXELS * 4 + 1]; + +// posted when it is safe to output a new set of values +static struct semaphore reset_delay_complete_sem; +// alarm handle for handling delay +alarm_id_t reset_delay_alarm_id; + +int64_t reset_delay_complete(alarm_id_t id, void *user_data) { + reset_delay_alarm_id = 0; + sem_release(&reset_delay_complete_sem); + // no repeat + return 0; +} + +void __isr dma_complete_handler() { + if (dma_hw->ints0 & DMA_CHANNEL_MASK) { + // clear IRQ + dma_hw->ints0 = DMA_CHANNEL_MASK; + // when the dma is complete we start the reset delay timer + if (reset_delay_alarm_id) cancel_alarm(reset_delay_alarm_id); + reset_delay_alarm_id = add_alarm_in_us(400, reset_delay_complete, NULL, true); + } +} + +void dma_init(PIO pio, uint sm) { + dma_claim_mask(DMA_CHANNELS_MASK); + + // main DMA channel outputs 8 word fragments, and then chains back to the chain channel + dma_channel_config channel_config = dma_channel_get_default_config(DMA_CHANNEL); + channel_config_set_dreq(&channel_config, pio_get_dreq(pio, sm, true)); + channel_config_set_chain_to(&channel_config, DMA_CB_CHANNEL); + channel_config_set_irq_quiet(&channel_config, true); + dma_channel_configure(DMA_CHANNEL, + &channel_config, + &pio->txf[sm], + NULL, // set by chain + 8, // 8 words for 8 bit planes + false); + + // chain channel sends single word pointer to start of fragment each time + dma_channel_config chain_config = dma_channel_get_default_config(DMA_CB_CHANNEL); + dma_channel_configure(DMA_CB_CHANNEL, + &chain_config, + &dma_channel_hw_addr( + DMA_CHANNEL)->al3_read_addr_trig, // ch DMA config (target "ring" buffer size 4) - this is (read_addr trigger) + NULL, // set later + 1, + false); + + irq_set_exclusive_handler(DMA_IRQ_0, dma_complete_handler); + dma_channel_set_irq0_enabled(DMA_CHANNEL, true); + irq_set_enabled(DMA_IRQ_0, true); +} + +void output_strings_dma(value_bits_t *bits, uint value_length) { + for (uint i = 0; i < value_length; i++) { + fragment_start[i] = (uintptr_t) bits[i].planes; // MSB first + } + fragment_start[value_length] = 0; + dma_channel_hw_addr(DMA_CB_CHANNEL)->al3_read_addr_trig = (uintptr_t) fragment_start; +} + + +int main() { + //set_sys_clock_48(); + stdio_init_all(); + puts("WS2812 parallel"); + + // todo get free sm + PIO pio = pio0; + int sm = 0; + uint offset = pio_add_program(pio, &ws2812_parallel_program); + + ws2812_parallel_program_init(pio, sm, offset, WS2812_PIN_BASE, count_of(strings), 800000); + + sem_init(&reset_delay_complete_sem, 1, 1); // initially posted so we don't block first time + dma_init(pio, sm); + int t = 0; + while (1) { + int pat = rand() % count_of(pattern_table); + int dir = (rand() >> 30) & 1 ? 1 : -1; + if (rand() & 1) dir = 0; + puts(pattern_table[pat].name); + puts(dir == 1 ? "(forward)" : dir ? "(backward)" : "(still)"); + int brightness = 0; + uint current = 0; + for (int i = 0; i < 1000; ++i) { + current_string_out = string0.data; + current_string_4color = false; + pattern_table[pat].pat(NUM_PIXELS, t); + current_string_out = string1.data; + current_string_4color = true; + pattern_table[pat].pat(NUM_PIXELS, t); + + transform_strings(strings, count_of(strings), colors, NUM_PIXELS * 4, brightness); + dither_values(colors, states[current], states[current ^ 1], NUM_PIXELS * 4); + sem_acquire_blocking(&reset_delay_complete_sem); + output_strings_dma(states[current], NUM_PIXELS * 4); + + current ^= 1; + t += dir; + brightness++; + if (brightness == (0x20 << FRAC_BITS)) brightness = 0; + } + memset(&states, 0, sizeof(states)); // clear out errors + } +}