#ifndef __INC_M0_CLOCKLESS_H #define __INC_M0_CLOCKLESS_H struct M0ClocklessData { uint8_t d[3]; uint8_t e[3]; uint8_t adj; uint8_t pad; uint32_t s[3]; }; templateint showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, uint32_t num_leds, struct M0ClocklessData *pData) { // Lo register variables register uint32_t scratch=0; register struct M0ClocklessData *base = pData; register volatile uint32_t *port = _port; register uint32_t d=0; register uint32_t counter=num_leds; register uint32_t bn=0; register uint32_t b=0; register uint32_t bitmask = _bitmask; // high register variable register const uint8_t *leds = _leds; #if (FASTLED_SCALE8_FIXED == 1) pData->s[0]++; pData->s[1]++; pData->s[2]++; #endif asm __volatile__ ( /////////////////////////////////////////////////////////////////////////// // // asm macro definitions - used to assemble the clockless output // ".ifnotdef fl_delay_def;" #ifdef FASTLED_ARM_M0_PLUS " .set fl_is_m0p, 1;" " .macro m0pad;" " nop;" " .endm;" #else " .set fl_is_m0p, 0;" " .macro m0pad;" " .endm;" #endif " .set fl_delay_def, 1;" " .set fl_delay_mod, 4;" " .if fl_is_m0p == 1;" " .set fl_delay_mod, 3;" " .endif;" " .macro fl_delay dtime, reg=r0;" " .if (\\dtime > 0);" " .set dcycle, (\\dtime / fl_delay_mod);" " .set dwork, (dcycle * fl_delay_mod);" " .set drem, (\\dtime - dwork);" " .rept (drem);" " nop;" " .endr;" " .if dcycle > 0;" " mov \\reg, #dcycle;" " delayloop_\\@:;" " sub \\reg, #1;" " bne delayloop_\\@;" " .if fl_is_m0p == 0;" " nop;" " .endif;" " .endif;" " .endif;" " .endm;" " .macro mod_delay dtime,b1,b2,reg;" " .set adj, (\\b1 + \\b2);" " .if adj < \\dtime;" " .set dtime2, (\\dtime - adj);" " fl_delay dtime2, \\reg;" " .endif;" " .endm;" // check the bit and drop the line low if it isn't set " .macro qlo4 b,bitmask,port,loff ;" " lsl \\b, #1 ;" " bcs skip_\\@ ;" " str \\bitmask, [\\port, \\loff] ;" " skip_\\@: ;" " m0pad;" " .endm ;" // set the pin hi or low (determined by the offset passed in ) " .macro qset2 bitmask,port,loff;" " str \\bitmask, [\\port, \\loff];" " m0pad;" " .endm;" // Load up the next led byte to work with, put it in bn " .macro loadleds3 leds, bn, rled, scratch;" " mov \\scratch, \\leds;" " ldrb \\bn, [\\scratch, \\rled];" " .endm;" // check whether or not we should dither " .macro loaddither7 bn,d,base,rdither;" " ldrb \\d, [\\base, \\rdither];" " lsl \\d, #24;" //; shift high for the qadd w/bn " lsl \\bn, #24;" //; shift high for the qadd w/d " bne chkskip_\\@;" //; if bn==0, clear d;" " eor \\d, \\d;" //; clear d;" " m0pad;" " chkskip_\\@:;" " .endm;" // Do the qadd8 for dithering -- there's two versions of this. The m0 version // takes advantage of the 3 cycle branch to do two things after the branch, // while keeping timing constant. The m0+, however, branches in 2 cycles, so // we have to work around that a bit more. This is one of the few times // where the m0 will actually be _more_ efficient than the m0+ " .macro dither5 bn,d;" " .syntax unified;" " .if fl_is_m0p == 0;" " adds \\bn, \\d;" // do the add " bcc dither5_1_\\@;" " mvns \\bn, \\bn;" // set the low 24bits ot 1's " lsls \\bn, \\bn, #24;" // move low 8 bits to the high bits " dither5_1_\\@:;" " nop;" // nop to keep timing in line " .else;" " adds \\bn, \\d;" // do the add" " bcc dither5_2_\\@;" " mvns \\bn, \\bn;" // set the low 24bits ot 1's " dither5_2_\\@:;" " bcc dither5_3_\\@;" " lsls \\bn, \\bn, #24;" // move low 8 bits to the high bits " dither5_3_\\@:;" " .endif;" " .syntax divided;" " .endm;" // Do our scaling " .macro scale4 bn, base, scale, scratch;" " ldr \\scratch, [\\base, \\scale];" " lsr \\bn, \\bn, #24;" // bring bn back down to its low 8 bits " mul \\bn, \\scratch;" // do the multiply " .endm;" // swap bn into b " .macro swapbbn1 b,bn;" " lsl \\b, \\bn, #16;" // put the 8 bits we want for output high " .endm;" // adjust the dithering value for the next time around (load e from memory // to do the math) " .macro adjdither7 base,d,rled,eoffset,scratch;" " ldrb \\d, [\\base, \\rled];" " ldrb \\scratch,[\\base,\\eoffset];" // load e " .syntax unified;" " subs \\d, \\scratch, \\d;" // d=e-d " .syntax divided;" " strb \\d, [\\base, \\rled];" // save d " .endm;" // increment the led pointer (base+6 has what we're incrementing by) " .macro incleds3 leds, base, scratch;" " ldrb \\scratch, [\\base, #6];" // load incremen " add \\leds, \\leds, \\scratch;" // update leds pointer " .endm;" // compare and loop " .macro cmploop5 counter,label;" " .syntax unified;" " subs \\counter, #1;" " .syntax divided;" " beq done_\\@;" " m0pad;" " b \\label;" " done_\\@:;" " .endm;" " .endif;" ); #define M0_ASM_ARGS : \ [leds] "+h" (leds), \ [counter] "+l" (counter), \ [scratch] "+l" (scratch), \ [d] "+l" (d), \ [bn] "+l" (bn), \ [b] "+l" (b) \ : \ [port] "l" (port), \ [base] "l" (base), \ [bitmask] "l" (bitmask), \ [hi_off] "I" (HI_OFFSET), \ [lo_off] "I" (LO_OFFSET), \ [led0] "I" (RO(0)), \ [led1] "I" (RO(1)), \ [led2] "I" (RO(2)), \ [e0] "I" (3+RO(0)), \ [e1] "I" (3+RO(1)), \ [e2] "I" (3+RO(2)), \ [scale0] "I" (4*(2+RO(0))), \ [scale1] "I" (4*(2+RO(1))), \ [scale2] "I" (4*(2+RO(2))), \ [T1] "I" (T1), \ [T2] "I" (T2), \ [T3] "I" (T3) \ : ///////////////////////////////////////////////////////////////////////// // now for some convinience macros to make building our lines a bit cleaner #define LOOP " loop_%=:" #define HI2 " qset2 %[bitmask], %[port], %[hi_off];" #define _D1 " mod_delay %c[T1],2,0,%[scratch];" #define QLO4 " qlo4 %[b],%[bitmask],%[port], %[lo_off];" #define LOADLEDS3(X) " loadleds3 %[leds], %[bn], %[led" #X "] ,%[scratch];" #define _D2(ADJ) " mod_delay %c[T2],4," #ADJ ",%[scratch];" #define LO2 " qset2 %[bitmask], %[port], %[lo_off];" #define _D3(ADJ) " mod_delay %c[T3],2," #ADJ ",%[scratch];" #define LOADDITHER7(X) " loaddither7 %[bn], %[d], %[base], %[led" #X "];" #define DITHER5 " dither5 %[bn], %[d];" #define SCALE4(X) " scale4 %[bn], %[base], %[scale" #X "], %[scratch];" #define SWAPBBN1 " swapbbn1 %[b], %[bn];" #define ADJDITHER7(X) " adjdither7 %[base],%[d],%[led" #X "],%[e" #X "],%[scratch];" #define INCLEDS3 " incleds3 %[leds],%[base],%[scratch];" #define CMPLOOP5 " cmploop5 %[counter], loop_%=;" #define NOTHING "" #if (defined(SEI_CHK) && (FASTLED_ALLOW_INTERRUPTS == 1)) // We're allowing interrupts and have hardware timer support defined - // track the loop outside the asm code, to allow inserting the interrupt // overrun checks. asm __volatile__ ( // pre-load byte 0 LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1 M0_ASM_ARGS); do { asm __volatile__ ( // Write out byte 0, prepping byte 1 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(1) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(1) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(1) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(1) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0) // Write out byte 1, prepping byte 2 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(2) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(2) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(2) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(2) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0) // Write out byte 2, prepping byte 0 HI2 _D1 QLO4 INCLEDS3 _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(0) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(0) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(0) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(0) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(5) M0_ASM_ARGS ); SEI_CHK; INNER_SEI; --counter; CLI_CHK; } while(counter); #elif (FASTLED_ALLOW_INTERRUPTS == 1) // We're allowing interrupts - track the loop outside the asm code, and // re-enable interrupts in between each iteration. asm __volatile__ ( // pre-load byte 0 LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1 M0_ASM_ARGS); do { asm __volatile__ ( // Write out byte 0, prepping byte 1 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(1) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(1) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(1) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(1) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0) // Write out byte 1, prepping byte 2 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(2) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(2) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(2) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(2) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 INCLEDS3 _D2(3) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0) // Write out byte 2, prepping byte 0 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(0) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(0) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(0) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(0) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(5) M0_ASM_ARGS ); uint32_t ticksBeforeInterrupts = SysTick->VAL; sei(); --counter; cli(); // If more than 45 uSecs have elapsed, give up on this frame and start over. // Note: this isn't completely correct. It's possible that more than one // millisecond will elapse, and so SysTick->VAL will lap // ticksBeforeInterrupts. // Note: ticksBeforeInterrupts DECREASES const uint32_t kTicksPerMs = VARIANT_MCK / 1000; const uint32_t kTicksPerUs = kTicksPerMs / 1000; const uint32_t kTicksIn45us = kTicksPerUs * 45; const uint32_t currentTicks = SysTick->VAL; if (ticksBeforeInterrupts < currentTicks) { // Timer started over if ((ticksBeforeInterrupts + (kTicksPerMs - currentTicks)) > kTicksIn45us) { return 0; } } else { if ((ticksBeforeInterrupts - currentTicks) > kTicksIn45us) { return 0; } } } while(counter); #else // We're not allowing interrupts - run the entire loop in asm to keep things // as tight as possible. In an ideal world, we should be pushing out ws281x // leds (or other 3-wire leds) with zero gaps between pixels. asm __volatile__ ( // pre-load byte 0 LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1 // loop over writing out the data LOOP // Write out byte 0, prepping byte 1 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(1) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(1) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(1) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(1) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0) // Write out byte 1, prepping byte 2 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(2) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(2) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(2) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(2) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 INCLEDS3 _D2(3) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(0) // Write out byte 2, prepping byte 0 HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 LOADLEDS3(0) _D2(3) LO2 _D3(0) HI2 _D1 QLO4 LOADDITHER7(0) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 DITHER5 _D2(5) LO2 _D3(0) HI2 _D1 QLO4 SCALE4(0) _D2(4) LO2 _D3(0) HI2 _D1 QLO4 ADJDITHER7(0) _D2(7) LO2 _D3(0) HI2 _D1 QLO4 NOTHING _D2(0) LO2 _D3(0) HI2 _D1 QLO4 SWAPBBN1 _D2(1) LO2 _D3(5) CMPLOOP5 M0_ASM_ARGS ); #endif return num_leds; } #endif