strip-controller-esp8266/.pio/libdeps/local/FastLED/fastspi_bitbang.h
2020-07-17 18:55:06 +02:00

381 lines
16 KiB
C++

#ifndef __INC_FASTSPI_BITBANG_H
#define __INC_FASTSPI_BITBANG_H
#include "FastLED.h"
#include "fastled_delay.h"
FASTLED_NAMESPACE_BEGIN
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Software SPI (aka bit-banging) support - with aggressive optimizations for when the clock and data pin are on the same port
//
// TODO: Replace the select pin definition with a set of pins, to allow using mux hardware for routing in the future
//
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint32_t SPI_SPEED>
class AVRSoftwareSPIOutput {
// The data types for pointers to the pin port - typedef'd here from the Pin definition because on avr these
// are pointers to 8 bit values, while on arm they are 32 bit
typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
typedef typename FastPin<CLOCK_PIN>::port_ptr_t clock_ptr_t;
// The data type for what's at a pin's port - typedef'd here from the Pin definition because on avr the ports
// are 8 bits wide while on arm they are 32.
typedef typename FastPin<DATA_PIN>::port_t data_t;
typedef typename FastPin<CLOCK_PIN>::port_t clock_t;
Selectable *m_pSelect;
public:
AVRSoftwareSPIOutput() { m_pSelect = NULL; }
AVRSoftwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
void init() {
// set the pins to output and make sure the select is released (which apparently means hi? This is a bit
// confusing to me)
FastPin<DATA_PIN>::setOutput();
FastPin<CLOCK_PIN>::setOutput();
release();
}
// stop the SPI output. Pretty much a NOP with software, as there's no registers to kick
static void stop() { }
// wait until the SPI subsystem is ready for more data to write. A NOP when bitbanging
static void wait() __attribute__((always_inline)) { }
static void waitFully() __attribute__((always_inline)) { wait(); }
static void writeByteNoWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); }
static void writeBytePostWait(uint8_t b) __attribute__((always_inline)) { writeByte(b); wait(); }
static void writeWord(uint16_t w) __attribute__((always_inline)) { writeByte(w>>8); writeByte(w&0xFF); }
// naive writeByte implelentation, simply calls writeBit on the 8 bits in the byte.
static void writeByte(uint8_t b) {
writeBit<7>(b);
writeBit<6>(b);
writeBit<5>(b);
writeBit<4>(b);
writeBit<3>(b);
writeBit<2>(b);
writeBit<1>(b);
writeBit<0>(b);
}
private:
// writeByte implementation with data/clock registers passed in.
static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) {
writeBit<7>(b, clockpin, datapin);
writeBit<6>(b, clockpin, datapin);
writeBit<5>(b, clockpin, datapin);
writeBit<4>(b, clockpin, datapin);
writeBit<3>(b, clockpin, datapin);
writeBit<2>(b, clockpin, datapin);
writeBit<1>(b, clockpin, datapin);
writeBit<0>(b, clockpin, datapin);
}
// writeByte implementation with the data register passed in and prebaked values for data hi w/clock hi and
// low and data lo w/clock hi and lo. This is to be used when clock and data are on the same GPIO register,
// can get close to getting a bit out the door in 2 clock cycles!
static void writeByte(uint8_t b, data_ptr_t datapin,
data_t hival, data_t loval,
clock_t hiclock, clock_t loclock) {
writeBit<7>(b, datapin, hival, loval, hiclock, loclock);
writeBit<6>(b, datapin, hival, loval, hiclock, loclock);
writeBit<5>(b, datapin, hival, loval, hiclock, loclock);
writeBit<4>(b, datapin, hival, loval, hiclock, loclock);
writeBit<3>(b, datapin, hival, loval, hiclock, loclock);
writeBit<2>(b, datapin, hival, loval, hiclock, loclock);
writeBit<1>(b, datapin, hival, loval, hiclock, loclock);
writeBit<0>(b, datapin, hival, loval, hiclock, loclock);
}
// writeByte implementation with not just registers passed in, but pre-baked values for said registers for
// data hi/lo and clock hi/lo values. Note: weird things will happen if this method is called in cases where
// the data and clock pins are on the same port! Don't do that!
static void writeByte(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin,
data_t hival, data_t loval,
clock_t hiclock, clock_t loclock) {
writeBit<7>(b, clockpin, datapin, hival, loval, hiclock, loclock);
writeBit<6>(b, clockpin, datapin, hival, loval, hiclock, loclock);
writeBit<5>(b, clockpin, datapin, hival, loval, hiclock, loclock);
writeBit<4>(b, clockpin, datapin, hival, loval, hiclock, loclock);
writeBit<3>(b, clockpin, datapin, hival, loval, hiclock, loclock);
writeBit<2>(b, clockpin, datapin, hival, loval, hiclock, loclock);
writeBit<1>(b, clockpin, datapin, hival, loval, hiclock, loclock);
writeBit<0>(b, clockpin, datapin, hival, loval, hiclock, loclock);
}
public:
// We want to make sure that the clock pulse is held high for a nininum of 35ns.
#if defined(FASTLED_TEENSY4)
#define DELAY_NS (1000 / (SPI_SPEED/1000000))
#define CLOCK_HI_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
#define CLOCK_LO_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
#else
#define MIN_DELAY ((NS(35)>3) ? (NS(35) - 3) : 1)
#define CLOCK_HI_DELAY do { delaycycles<MIN_DELAY>(); delaycycles<((SPI_SPEED > 10) ? (((SPI_SPEED-6) / 2) - MIN_DELAY) : (SPI_SPEED))>(); } while(0);
#define CLOCK_LO_DELAY do { delaycycles<((SPI_SPEED > 10) ? ((SPI_SPEED-6) / 2) : (SPI_SPEED))>(); } while(0);
#endif
// write the BIT'th bit out via spi, setting the data pin then strobing the clcok
template <uint8_t BIT> __attribute__((always_inline, hot)) inline static void writeBit(uint8_t b) {
//cli();
if(b & (1 << BIT)) {
FastPin<DATA_PIN>::hi();
#ifdef ESP32
// try to ensure we never have adjacent write opcodes to the same register
FastPin<CLOCK_PIN>::lo();
FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
FastPin<CLOCK_PIN>::toggle(); CLOCK_LO_DELAY;
#else
FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
#endif
} else {
FastPin<DATA_PIN>::lo();
FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
#ifdef ESP32
// try to ensure we never have adjacent write opcodes to the same register
FastPin<CLOCK_PIN>::toggle(); CLOCK_HI_DELAY;
#else
FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
#endif
}
//sei();
}
private:
// write the BIT'th bit out via spi, setting the data pin then strobing the clock, using the passed in pin registers to accelerate access if needed
template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin) {
if(b & (1 << BIT)) {
FastPin<DATA_PIN>::hi(datapin);
FastPin<CLOCK_PIN>::hi(clockpin); CLOCK_HI_DELAY;
FastPin<CLOCK_PIN>::lo(clockpin); CLOCK_LO_DELAY;
} else {
FastPin<DATA_PIN>::lo(datapin);
FastPin<CLOCK_PIN>::hi(clockpin); CLOCK_HI_DELAY;
FastPin<CLOCK_PIN>::lo(clockpin); CLOCK_LO_DELAY;
}
}
// the version of write to use when clock and data are on separate pins with precomputed values for setting
// the clock and data pins
template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, clock_ptr_t clockpin, data_ptr_t datapin,
data_t hival, data_t loval, clock_t hiclock, clock_t loclock) {
// // only need to explicitly set clock hi if clock and data are on different ports
if(b & (1 << BIT)) {
FastPin<DATA_PIN>::fastset(datapin, hival);
FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); CLOCK_HI_DELAY;
FastPin<CLOCK_PIN>::fastset(clockpin, loclock); CLOCK_LO_DELAY;
} else {
// FL_NOP;
FastPin<DATA_PIN>::fastset(datapin, loval);
FastPin<CLOCK_PIN>::fastset(clockpin, hiclock); CLOCK_HI_DELAY;
FastPin<CLOCK_PIN>::fastset(clockpin, loclock); CLOCK_LO_DELAY;
}
}
// the version of write to use when clock and data are on the same port with precomputed values for the various
// combinations
template <uint8_t BIT> __attribute__((always_inline)) inline static void writeBit(uint8_t b, data_ptr_t clockdatapin,
data_t datahiclockhi, data_t dataloclockhi,
data_t datahiclocklo, data_t dataloclocklo) {
#if 0
writeBit<BIT>(b);
#else
if(b & (1 << BIT)) {
FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo);
FastPin<DATA_PIN>::fastset(clockdatapin, datahiclockhi); CLOCK_HI_DELAY;
FastPin<DATA_PIN>::fastset(clockdatapin, datahiclocklo); CLOCK_LO_DELAY;
} else {
// FL_NOP;
FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo);
FastPin<DATA_PIN>::fastset(clockdatapin, dataloclockhi); CLOCK_HI_DELAY;
FastPin<DATA_PIN>::fastset(clockdatapin, dataloclocklo); CLOCK_LO_DELAY;
}
#endif
}
public:
// select the SPI output (TODO: research whether this really means hi or lo. Alt TODO: move select responsibility out of the SPI classes
// entirely, make it up to the caller to remember to lock/select the line?)
void select() { if(m_pSelect != NULL) { m_pSelect->select(); } } // FastPin<SELECT_PIN>::hi(); }
// release the SPI line
void release() { if(m_pSelect != NULL) { m_pSelect->release(); } } // FastPin<SELECT_PIN>::lo(); }
// Write out len bytes of the given value out over SPI. Useful for quickly flushing, say, a line of 0's down the line.
void writeBytesValue(uint8_t value, int len) {
select();
writeBytesValueRaw(value, len);
release();
}
static void writeBytesValueRaw(uint8_t value, int len) {
#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
// TODO: Weird things may happen if software bitbanging SPI output and other pins on the output reigsters are being twiddled. Need
// to allow specifying whether or not exclusive i/o access is allowed during this process, and if i/o access is not allowed fall
// back to the degenerative code below
while(len--) {
writeByte(value);
}
#else
register data_ptr_t datapin = FastPin<DATA_PIN>::port();
if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
// If data and clock are on different ports, then writing a bit will consist of writing the value foor
// the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
register data_t datahi = FastPin<DATA_PIN>::hival();
register data_t datalo = FastPin<DATA_PIN>::loval();
register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
while(len--) {
writeByte(value, clockpin, datapin, datahi, datalo, clockhi, clocklo);
}
} else {
// If data and clock are on the same port then we can combine setting the data and clock pins
register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
while(len--) {
writeByte(value, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
}
}
#endif
}
// write a block of len uint8_ts out. Need to type this better so that explicit casts into the call aren't required.
// note that this template version takes a class parameter for a per-byte modifier to the data.
template <class D> void writeBytes(register uint8_t *data, int len) {
select();
#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
uint8_t *end = data + len;
while(data != end) {
writeByte(D::adjust(*data++));
}
#else
register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
register data_ptr_t datapin = FastPin<DATA_PIN>::port();
if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
// If data and clock are on different ports, then writing a bit will consist of writing the value foor
// the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
register data_t datahi = FastPin<DATA_PIN>::hival();
register data_t datalo = FastPin<DATA_PIN>::loval();
register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
uint8_t *end = data + len;
while(data != end) {
writeByte(D::adjust(*data++), clockpin, datapin, datahi, datalo, clockhi, clocklo);
}
} else {
// FastPin<CLOCK_PIN>::hi();
// If data and clock are on the same port then we can combine setting the data and clock pins
register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
uint8_t *end = data + len;
while(data != end) {
writeByte(D::adjust(*data++), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
}
// FastPin<CLOCK_PIN>::lo();
}
#endif
D::postBlock(len);
release();
}
// default version of writing a block of data out to the SPI port, with no data modifications being made
void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
// write a block of uint8_ts out in groups of three. len is the total number of uint8_ts to write out. The template
// parameters indicate how many uint8_ts to skip at the beginning of each grouping, as well as a class specifying a per
// byte of data modification to be made. (See DATA_NOP above)
template <uint8_t FLAGS, class D, EOrder RGB_ORDER> __attribute__((noinline)) void writePixels(PixelController<RGB_ORDER> pixels) {
select();
int len = pixels.mLen;
#ifdef FAST_SPI_INTERRUPTS_WRITE_PINS
// If interrupts or other things may be generating output while we're working on things, then we need
// to use this block
while(pixels.has(1)) {
if(FLAGS & FLAG_START_BIT) {
writeBit<0>(1);
}
writeByte(D::adjust(pixels.loadAndScale0()));
writeByte(D::adjust(pixels.loadAndScale1()));
writeByte(D::adjust(pixels.loadAndScale2()));
pixels.advanceData();
pixels.stepDithering();
}
#else
// If we can guaruntee that no one else will be writing data while we are running (namely, changing the values of the PORT/PDOR pins)
// then we can use a bunch of optimizations in here
register data_ptr_t datapin = FastPin<DATA_PIN>::port();
if(FastPin<DATA_PIN>::port() != FastPin<CLOCK_PIN>::port()) {
register clock_ptr_t clockpin = FastPin<CLOCK_PIN>::port();
// If data and clock are on different ports, then writing a bit will consist of writing the value foor
// the bit (hi or low) to the data pin port, and then two writes to the clock port to strobe the clock line
register data_t datahi = FastPin<DATA_PIN>::hival();
register data_t datalo = FastPin<DATA_PIN>::loval();
register clock_t clockhi = FastPin<CLOCK_PIN>::hival();
register clock_t clocklo = FastPin<CLOCK_PIN>::loval();
while(pixels.has(1)) {
if(FLAGS & FLAG_START_BIT) {
writeBit<0>(1, clockpin, datapin, datahi, datalo, clockhi, clocklo);
}
writeByte(D::adjust(pixels.loadAndScale0()), clockpin, datapin, datahi, datalo, clockhi, clocklo);
writeByte(D::adjust(pixels.loadAndScale1()), clockpin, datapin, datahi, datalo, clockhi, clocklo);
writeByte(D::adjust(pixels.loadAndScale2()), clockpin, datapin, datahi, datalo, clockhi, clocklo);
pixels.advanceData();
pixels.stepDithering();
}
} else {
// If data and clock are on the same port then we can combine setting the data and clock pins
register data_t datahi_clockhi = FastPin<DATA_PIN>::hival() | FastPin<CLOCK_PIN>::mask();
register data_t datalo_clockhi = FastPin<DATA_PIN>::loval() | FastPin<CLOCK_PIN>::mask();
register data_t datahi_clocklo = FastPin<DATA_PIN>::hival() & ~FastPin<CLOCK_PIN>::mask();
register data_t datalo_clocklo = FastPin<DATA_PIN>::loval() & ~FastPin<CLOCK_PIN>::mask();
while(pixels.has(1)) {
if(FLAGS & FLAG_START_BIT) {
writeBit<0>(1, datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
}
writeByte(D::adjust(pixels.loadAndScale0()), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
writeByte(D::adjust(pixels.loadAndScale1()), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
writeByte(D::adjust(pixels.loadAndScale2()), datapin, datahi_clockhi, datalo_clockhi, datahi_clocklo, datalo_clocklo);
pixels.advanceData();
pixels.stepDithering();
}
}
#endif
D::postBlock(len);
release();
}
};
FASTLED_NAMESPACE_END
#endif