From 54925dfc0e598ab5afe5bafdb71876b54b5a5314 Mon Sep 17 00:00:00 2001 From: Fabian Schlenz Date: Fri, 18 Oct 2019 06:40:09 +0200 Subject: [PATCH] Snake effect now uses a neural net with a hidden layer; a bug in _is_free() that lead to lots of snakes committing suicide was fixed; neural net weights are now given as binary representation of a float to prevent rounding errors. --- include/effect_snake.h | 22 ++++-- src/effect_snake.cpp | 92 +++++++++++++++++-------- src/tools/snakenet/plot | 6 +- src/tools/snakenet/snakenet.rb | 120 +++++++++++++++++++++++++-------- 4 files changed, 178 insertions(+), 62 deletions(-) diff --git a/include/effect_snake.h b/include/effect_snake.h index 84816ee..5b30dbe 100644 --- a/include/effect_snake.h +++ b/include/effect_snake.h @@ -8,6 +8,8 @@ #define SNAKE_DIR_SOUTH 2 #define SNAKE_DIR_WEST 3 +#define SNAKE_DEBUG false + class SnakeEffect : public Effect { private: Coords _pos; @@ -18,10 +20,22 @@ private: uint8_t _length; unsigned long _last_apple_at; unsigned long _last_move_at; - // 204.6 points, length 35, 58% stopped, 42% dead - // float _weights[18] = {0.8613356309729567, 0.1010670216231977, -0.03801953620401166, 0.7556556498067926, -0.9925124063206012, 0.6375227768823608, 0.7216340201735381, 0.5557451907997892, 0.03496949604344035, 0.7238725631217913, 0.9070987343528141, 0.6518810721526125, -0.17322587217593544, -0.7726195238221361, 0.8044226332955624, 0.8434782354002677, 0.8508760698750302, 0.47735675603010397}; - // Round 2077: 208.6 points, length 36, 50% stopped, 50% dead - float _weights[18] = {-0.1648448727142625, -0.7505284618312464, 0.0037384390323656203, -0.6678743938665241, 0.917231716139375, 0.05960885292612439, -0.7585782758281971, 0.8275111343144115, 0.7821852602229209, 0.29970244548911523, 0.9737979047604144, -0.2384723067003974, -0.7854491847031548, 0.44652781127984964, 0.9127919336231882, 0.3309096816699824, -0.9071832356948208, -0.23802066581485848}; + uint16_t _round; + + // Neural net config + // These are actually float values. But in order to prevent rounding errors and stuff, they are provided + // in form of the raw binary data of the IEE754 floating point numbers. + // In _decide() there's code to memcpy()-convert them to a float. + // Round 340, 223.4 points, length 39, 36% stopped, 64% died + // const uint32_t _weights[36] = {0xbd8e626e, 0xbee2cd2c, 0x3e4d5cab, 0x3eceb8c3, 0xbed0a514, 0x3ec62438, 0x3e947ed4, 0xbe4b8bf2, 0xbf301113, 0xbf3f0a75, 0x3f1868f7, 0xbf0253ca, 0xbedca2f2, 0xbd547c6d, 0x3edd6a8a, 0xbd4b97b6, 0x3f64ec26, 0xbe5323c1, 0x3eccf87d, 0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbf03f08e, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3dcd78, 0x3f37a58d, 0x3ef4a25b}; + // Round 630, 221.0 points, length 38, 36% stopped, 64% died + const uint32_t _weights[36] = {0xbd25943f, 0xbf279d81, 0x3e25d128, 0x3ec62438, 0x3f0e719c, 0x3eefbea9, 0x3e947ed4, 0xbe5323c1, 0xbf2d4796, 0xbf3f0a75, 0x3f0e45d9, 0xbf0253ca, 0xbedca2f2, 0xbd79073c, 0x3ede80ec, 0xbd4b97b6, 0x3f69a6be, 0xbe4b8bf2, 0x3eccf87d, 0xbf301113, 0xbf62b6e8, 0xbf71daf6, 0xbf204130, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4954eb, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3b4e44, 0x3f484d59, 0x3edd6a8a}; + + const uint8_t _net_layout[3] = {6, 4, 3}; + const uint8_t _net_layers = 3; + const uint8_t _net_total_size = 36; + + uint16_t _xy2i(uint8_t x, uint8_t y); uint16_t _xy2i(Coords c); Coords _i2xy(uint16_t i); diff --git a/src/effect_snake.cpp b/src/effect_snake.cpp index 6fa4e4f..a7e87f7 100644 --- a/src/effect_snake.cpp +++ b/src/effect_snake.cpp @@ -10,6 +10,7 @@ SnakeEffect::SnakeEffect() { void SnakeEffect::_init() { _dying = 0; + _round = 0; _last_apple_at = millis(); _last_move_at = millis(); _dir = SNAKE_DIR_NORTH; @@ -17,6 +18,9 @@ void SnakeEffect::_init() { _pos = {(uint8_t)(window->width/2), (uint8_t)(window->height/2)}; for (int i=0; i<_pixels; i++) _map[i]=0; _map[_xy2i(_pos)]=1; + _map[_xy2i(_pos)+window->width*1]=2; + _map[_xy2i(_pos)+window->width*2]=3; + _map[_xy2i(_pos)+window->width*3]=4; _place_apple(); } @@ -26,6 +30,10 @@ SnakeEffect::~SnakeEffect() { } void SnakeEffect::_place_apple() { + if (SNAKE_DEBUG) { + _apple = {3, 3}; + return; + } if (_length < _pixels) { uint8_t start = random8(_pixels); for (int i=0; i<_pixels; i++) { @@ -45,27 +53,50 @@ void SnakeEffect::_decide() { uint8_t a_s = _to_apple(_dir); uint8_t a_r = _to_apple(_dir + 1); - uint8_t inputs[6] = {f_l, f_s, f_r, a_l, a_s, a_r}; - - float outputs[3] = {0.0, 0.0, 0.0}; - - for (int i=0; i<18; i++) { - uint8_t out = i/6; - uint8_t in = i%6; - outputs[out] += _weights[i] * inputs[in]; + float* inputs = new float[6]; + inputs[0] = f_l; + inputs[1] = f_s; + inputs[2] = f_r; + inputs[3] = a_l; + inputs[4] = a_s; + inputs[5] = a_r; + if (SNAKE_DEBUG) LOGln("SnakeEffect * Position: %d, %d - Inputs: %3.1f %3.1f %3.1f %3.1f %3.1f %3.1f", _pos.x, _pos.y, inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5]); + float* outputs = NULL; + uint8_t i=0; + for (uint8_t layer=1; layer<_net_layers; layer++) { + outputs = new float[_net_layout[layer]]; + for (uint8_t j=0; j<_net_layout[layer]; j++) { + outputs[j] = 0.0; + } + for (uint8_t idx_out=0; idx_out<_net_layout[layer]; idx_out++) { + for (uint8_t idx_in=0; idx_in<_net_layout[layer-1]; idx_in++) { + float weight; + memcpy(&weight, &(_weights[i]), sizeof(weight)); + outputs[idx_out] += weight * inputs[idx_in]; + //outputs[idx_out] += (*(float*)&(_weights[i])) * inputs[idx_in]; + i++; + } + } + delete inputs; + inputs = outputs; } int8_t decision = 0; - if (outputs[0]>=outputs[1] && outputs[0]>=outputs[2]) { - decision = -1; - } else if (outputs[1]>=outputs[2]) { - decision = 0; - } else { - decision = 1; + float last; + for (uint8_t i=0; i<_net_layout[_net_layers - 1]; i++) { + if (i==0 || outputs[i]>last) { + last = outputs[i]; + decision = i; + } } + decision = decision - 1; + delete outputs; + + if (SNAKE_DEBUG) LOGln("SnakeEffect * Decision: %d", decision); _dir += decision; if (_dir < 0) _dir += 4; + if (_dir > 3) _dir -= 4; } /** @@ -94,8 +125,7 @@ int8_t SnakeEffect::_manual_decision() { }*/ bool SnakeEffect::_is_free(uint8_t dir) { - Coords np = _new_pos(dir); - return np.x>=0 && np.xwidth && np.y>=0 && np.yheight && _map[_xy2i(np)]==0; + return _free_spaces(dir)!=0; } uint8_t SnakeEffect::_free_spaces(uint8_t dir) { @@ -109,15 +139,15 @@ uint8_t SnakeEffect::_free_spaces(uint8_t dir) { case SNAKE_DIR_WEST: x=-1; break; } Coords p(_pos); - uint8_t i; - for(i=0; iwidth || iheight; i++) { + uint8_t i=0; + while (true) { p.x += x; p.y += y; if (p.x<0 || p.x>=window->width || p.y<0 || p.y>=window->height || _map[_xy2i(p)]!=0) { - break; + return i; } + i++; } - return i; } uint8_t SnakeEffect::_to_apple(uint8_t dir) { @@ -159,10 +189,7 @@ Coords SnakeEffect::_i2xy(uint16_t i) { } void SnakeEffect::_move() { - if (_dying==0 && !_is_free(_dir)) { - _dying = 150; - return; - } + if (_dying > 0) { _dying--; @@ -175,11 +202,21 @@ void SnakeEffect::_move() { } unsigned long now = millis(); - if (_last_move_at < now && now - _last_move_at < 100) { + if (_last_move_at < now && now - _last_move_at < 0) { return; } + _round++; _last_move_at = now; + _decide(); + + if (_dying==0 && !_is_free(_dir)) { + _dying = 150; + return; + } + _pos = _new_pos(_dir); + if (SNAKE_DEBUG) LOGln("SnakeEffect * new_pos: %d, %d", _pos.x, _pos.y); + if (SNAKE_DEBUG) LOGln("SnakeEffect * apple: %d, %d", _apple.x, _apple.y); if (_pos.x==_apple.x && _pos.y==_apple.y) { _last_apple_at = millis(); _length++; @@ -215,12 +252,9 @@ void SnakeEffect::loop(uint16_t ms) { //CRGB color(CHSV(hue, 200, 255)); //window->setPixel(this->coords.x, this->coords.y, &color); //hue++; - if (millis() < _last_apple_at || millis() - _last_apple_at > 30000) { + if (_dying==0 && (millis() < _last_apple_at || millis() - _last_apple_at > 30000)) { _dying = 150; } - if (_dying==0) { - _decide(); - } _move(); _draw(); } diff --git a/src/tools/snakenet/plot b/src/tools/snakenet/plot index 05d6159..9207c99 100755 --- a/src/tools/snakenet/plot +++ b/src/tools/snakenet/plot @@ -1,2 +1,6 @@ #!/usr/bin/gnuplot -c -set term dumb 79 49; plot 'data_set.dat' notitle +set term dumb 79 49 +plot 'data_set.dat' using 1:2 title 'Points', \ +# 'data_set.dat' using 1:3 title 'Length' axes x1y2, \ +# 'data_set.dat' using 1:4 title 'Stopped' axes x1y2, \ +# 'data_set.dat' using 1:5 title 'Dead' axes x1y2 diff --git a/src/tools/snakenet/snakenet.rb b/src/tools/snakenet/snakenet.rb index 6165a5d..8bcdf75 100644 --- a/src/tools/snakenet/snakenet.rb +++ b/src/tools/snakenet/snakenet.rb @@ -14,8 +14,10 @@ class Game POINTS_MOVING_FAR = -1.5 attr_reader :points, :dead, :ai, :length + attr_accessor :apple - def initialize(a) + def initialize(a, debug=false) + @debug = debug @ai = a @data = [0]*(WIDTH*HEIGHT) @dir = 0 @@ -29,6 +31,7 @@ class Game @dead = false @round = 0 @last_apple_at = 0 + @count_left = @count_right = 0 place_apple() end @@ -63,6 +66,8 @@ class Game return if @dead decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1)) #puts "Decision: #{decision}" + @count_left += 1 if decision==-1 + @count_right += 1 if decision==1 @dir = (@dir + decision) % 4 if (free?(@dir)==0) #puts "Dead." @@ -73,11 +78,11 @@ class Game move end - def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end + def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end def move newpos = calc_new_pos(@pos, @dir) - #puts "Newpos: #{newpos}" + puts "Newpos: #{newpos}" if @debug if newpos==@apple @length+=1 @points += POINTS_APPLE @@ -156,23 +161,41 @@ class Game @dead = true end - def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end + def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end def ai_ranking; ai.ranking; end end class AI - attr_reader :weights + NETWORK_LAYOUT = [6, 4, 3] + attr_reader :weights, :id attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length - def initialize(w=nil) + def initialize(w=nil, debug=false) + @debug = debug reset() @rounds = 1 + @id = rand(0xFFFFFF) if w==nil - @weights = Array.new(18) { rand() * 2.0 - 1.0 } + @weights = Array.new(network_size()) { rand() * 2.0 - 1.0 } + puts "Initialized with random values: #{@weights}" if @debug else - @weights = w + if w[0].is_a? Integer + @weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten + else + @weights = w + end + puts "Initialized with given values: #{@weights}" if @debug end + + end + + def network_size + s = 0 + (0...(NETWORK_LAYOUT.count-1)).each do |i| + s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1] + end + return s end def reset @@ -191,43 +214,59 @@ class AI def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right) inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right] - #pp inputs - outputs = [0, 0, 0] - (0...18).each do |x| - o = x/6 - i = x%6 - outputs[o] += inputs[i] * @weights[x] + puts "Inputs: #{inputs}" if @debug + outputs = nil + x = 0 + (1...(NETWORK_LAYOUT.count)).each do |i| + c_in = NETWORK_LAYOUT[i-1] + c_out = NETWORK_LAYOUT[i] + outputs = Array.new(c_out){0.0} + (0...c_out).each do |o| + (0...c_in).each do |i| + outputs[o] += inputs[i] * @weights[x] + x+=1 + end + end + inputs = outputs end + max = 0 take = 0 - (0...3).each do |x| + (0...(NETWORK_LAYOUT.last)).each do |x| if outputs[x]>max max = outputs[x] take = x end end + puts "Decision: #{take-1}" if @debug return take-1 end def evolve w = @weights.dup - action = rand(4) + action = rand(5) if action==0 #swap - i1 = rand(18) - i2 = rand(18) + i1 = rand(network_size()) + i2 = rand(network_size()) temp = w[i1] w[i1] = w[i2] w[i2] = temp elsif action==1 #change single value - i = rand(18) + i = rand(network_size()) w[i] = rand() * 2 - 1.0 elsif action==2 #invert single value - i = rand(18) + i = rand(network_size()) w[i] *= -1.0 + elsif action==3 + (0...network_size()).each do |i| + w[i] = rand() * 2 - 1.0 if rand(5)==0 + end else #change multiple values - (0...18).each do |i| + (0...network_size()).each do |i| if (rand(5)==0) - w[i] = rand() * 2 - 1 + w[i] += rand() / 5.0 - 0.1 + w[i] = 1.0 if w[i]>1.0 + w[i] = -1.0 if w[i]<-1.0 end end end @@ -238,7 +277,7 @@ class AI def merge(ai) w = @weights.dup w2 = ai.weights.dup - (0...18).each do |i| + (0...network_size()).each do |i| if rand(2)==0 w[i] = w2[i] end @@ -249,19 +288,33 @@ class AI def average(ai) w = @weights.dup w2 = ai.weights - (0...18).each do |i| + (0...network_size()).each do |i| w[i] = (w[i] + w2[i]) / 2.0 end return AI.new(w) end def dump - puts "Data:" - puts "float _weights[18] = {#{@weights.join(", ")}};" + puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};" #puts "Simplified: #{simplified}" end end +## Simulate +=begin +ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75, +0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d, +0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619, +0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true) +g = Game.new(ai, true) +g.apple = [3, 3] +10.times do + g.loop +end +exit +=end + + graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w") graph.puts("# Round - Points - Length - Stopped - Dead") @@ -272,6 +325,7 @@ games = [] ais[x] = AI.new#(SEEDS.sample) end +best_old_game = nil best_old_ai = nil begin loop do @@ -297,8 +351,14 @@ loop do games_sorted = games.sort_by(&:ai_ranking).reverse.take(5) g = games_sorted[0] + + if (round-1)%50==0 + puts "----------------------------------------------------" + puts "Round | Points | Length | Stopped | Dead | ID " + puts "----------------------------------------------------" + end - puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")] + puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id] graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100]) graph.flush @@ -306,7 +366,8 @@ loop do g.ai.dump end - best_old_ai = g.ai + best_old_game = g + best_old_ai = g.ai.dup ais = [] games_sorted.each do |g| @@ -331,6 +392,9 @@ loop do round+=1 end rescue SystemExit, Interrupt + puts + puts + puts "// Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100] best_old_ai.dump graph.close end