Snake effect now uses a neural net with a hidden layer; a bug in _is_free() that lead to lots of snakes committing suicide was fixed; neural net weights are now given as binary representation of a float to prevent rounding errors.

This commit is contained in:
Fabian Schlenz 2019-10-18 06:40:09 +02:00
parent 306f72d838
commit 54925dfc0e
4 changed files with 178 additions and 62 deletions

View File

@ -8,6 +8,8 @@
#define SNAKE_DIR_SOUTH 2 #define SNAKE_DIR_SOUTH 2
#define SNAKE_DIR_WEST 3 #define SNAKE_DIR_WEST 3
#define SNAKE_DEBUG false
class SnakeEffect : public Effect { class SnakeEffect : public Effect {
private: private:
Coords _pos; Coords _pos;
@ -18,10 +20,22 @@ private:
uint8_t _length; uint8_t _length;
unsigned long _last_apple_at; unsigned long _last_apple_at;
unsigned long _last_move_at; unsigned long _last_move_at;
// 204.6 points, length 35, 58% stopped, 42% dead uint16_t _round;
// float _weights[18] = {0.8613356309729567, 0.1010670216231977, -0.03801953620401166, 0.7556556498067926, -0.9925124063206012, 0.6375227768823608, 0.7216340201735381, 0.5557451907997892, 0.03496949604344035, 0.7238725631217913, 0.9070987343528141, 0.6518810721526125, -0.17322587217593544, -0.7726195238221361, 0.8044226332955624, 0.8434782354002677, 0.8508760698750302, 0.47735675603010397};
// Round 2077: 208.6 points, length 36, 50% stopped, 50% dead // Neural net config
float _weights[18] = {-0.1648448727142625, -0.7505284618312464, 0.0037384390323656203, -0.6678743938665241, 0.917231716139375, 0.05960885292612439, -0.7585782758281971, 0.8275111343144115, 0.7821852602229209, 0.29970244548911523, 0.9737979047604144, -0.2384723067003974, -0.7854491847031548, 0.44652781127984964, 0.9127919336231882, 0.3309096816699824, -0.9071832356948208, -0.23802066581485848}; // These are actually float values. But in order to prevent rounding errors and stuff, they are provided
// in form of the raw binary data of the IEE754 floating point numbers.
// In _decide() there's code to memcpy()-convert them to a float.
// Round 340, 223.4 points, length 39, 36% stopped, 64% died
// const uint32_t _weights[36] = {0xbd8e626e, 0xbee2cd2c, 0x3e4d5cab, 0x3eceb8c3, 0xbed0a514, 0x3ec62438, 0x3e947ed4, 0xbe4b8bf2, 0xbf301113, 0xbf3f0a75, 0x3f1868f7, 0xbf0253ca, 0xbedca2f2, 0xbd547c6d, 0x3edd6a8a, 0xbd4b97b6, 0x3f64ec26, 0xbe5323c1, 0x3eccf87d, 0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbf03f08e, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3dcd78, 0x3f37a58d, 0x3ef4a25b};
// Round 630, 221.0 points, length 38, 36% stopped, 64% died
const uint32_t _weights[36] = {0xbd25943f, 0xbf279d81, 0x3e25d128, 0x3ec62438, 0x3f0e719c, 0x3eefbea9, 0x3e947ed4, 0xbe5323c1, 0xbf2d4796, 0xbf3f0a75, 0x3f0e45d9, 0xbf0253ca, 0xbedca2f2, 0xbd79073c, 0x3ede80ec, 0xbd4b97b6, 0x3f69a6be, 0xbe4b8bf2, 0x3eccf87d, 0xbf301113, 0xbf62b6e8, 0xbf71daf6, 0xbf204130, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4954eb, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3b4e44, 0x3f484d59, 0x3edd6a8a};
const uint8_t _net_layout[3] = {6, 4, 3};
const uint8_t _net_layers = 3;
const uint8_t _net_total_size = 36;
uint16_t _xy2i(uint8_t x, uint8_t y); uint16_t _xy2i(uint8_t x, uint8_t y);
uint16_t _xy2i(Coords c); uint16_t _xy2i(Coords c);
Coords _i2xy(uint16_t i); Coords _i2xy(uint16_t i);

View File

@ -10,6 +10,7 @@ SnakeEffect::SnakeEffect() {
void SnakeEffect::_init() { void SnakeEffect::_init() {
_dying = 0; _dying = 0;
_round = 0;
_last_apple_at = millis(); _last_apple_at = millis();
_last_move_at = millis(); _last_move_at = millis();
_dir = SNAKE_DIR_NORTH; _dir = SNAKE_DIR_NORTH;
@ -17,6 +18,9 @@ void SnakeEffect::_init() {
_pos = {(uint8_t)(window->width/2), (uint8_t)(window->height/2)}; _pos = {(uint8_t)(window->width/2), (uint8_t)(window->height/2)};
for (int i=0; i<_pixels; i++) _map[i]=0; for (int i=0; i<_pixels; i++) _map[i]=0;
_map[_xy2i(_pos)]=1; _map[_xy2i(_pos)]=1;
_map[_xy2i(_pos)+window->width*1]=2;
_map[_xy2i(_pos)+window->width*2]=3;
_map[_xy2i(_pos)+window->width*3]=4;
_place_apple(); _place_apple();
} }
@ -26,6 +30,10 @@ SnakeEffect::~SnakeEffect() {
} }
void SnakeEffect::_place_apple() { void SnakeEffect::_place_apple() {
if (SNAKE_DEBUG) {
_apple = {3, 3};
return;
}
if (_length < _pixels) { if (_length < _pixels) {
uint8_t start = random8(_pixels); uint8_t start = random8(_pixels);
for (int i=0; i<_pixels; i++) { for (int i=0; i<_pixels; i++) {
@ -45,27 +53,50 @@ void SnakeEffect::_decide() {
uint8_t a_s = _to_apple(_dir); uint8_t a_s = _to_apple(_dir);
uint8_t a_r = _to_apple(_dir + 1); uint8_t a_r = _to_apple(_dir + 1);
uint8_t inputs[6] = {f_l, f_s, f_r, a_l, a_s, a_r}; float* inputs = new float[6];
inputs[0] = f_l;
float outputs[3] = {0.0, 0.0, 0.0}; inputs[1] = f_s;
inputs[2] = f_r;
for (int i=0; i<18; i++) { inputs[3] = a_l;
uint8_t out = i/6; inputs[4] = a_s;
uint8_t in = i%6; inputs[5] = a_r;
outputs[out] += _weights[i] * inputs[in]; if (SNAKE_DEBUG) LOGln("SnakeEffect * Position: %d, %d - Inputs: %3.1f %3.1f %3.1f %3.1f %3.1f %3.1f", _pos.x, _pos.y, inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5]);
float* outputs = NULL;
uint8_t i=0;
for (uint8_t layer=1; layer<_net_layers; layer++) {
outputs = new float[_net_layout[layer]];
for (uint8_t j=0; j<_net_layout[layer]; j++) {
outputs[j] = 0.0;
}
for (uint8_t idx_out=0; idx_out<_net_layout[layer]; idx_out++) {
for (uint8_t idx_in=0; idx_in<_net_layout[layer-1]; idx_in++) {
float weight;
memcpy(&weight, &(_weights[i]), sizeof(weight));
outputs[idx_out] += weight * inputs[idx_in];
//outputs[idx_out] += (*(float*)&(_weights[i])) * inputs[idx_in];
i++;
}
}
delete inputs;
inputs = outputs;
} }
int8_t decision = 0; int8_t decision = 0;
if (outputs[0]>=outputs[1] && outputs[0]>=outputs[2]) { float last;
decision = -1; for (uint8_t i=0; i<_net_layout[_net_layers - 1]; i++) {
} else if (outputs[1]>=outputs[2]) { if (i==0 || outputs[i]>last) {
decision = 0; last = outputs[i];
} else { decision = i;
decision = 1; }
} }
decision = decision - 1;
delete outputs;
if (SNAKE_DEBUG) LOGln("SnakeEffect * Decision: %d", decision);
_dir += decision; _dir += decision;
if (_dir < 0) _dir += 4; if (_dir < 0) _dir += 4;
if (_dir > 3) _dir -= 4;
} }
/** /**
@ -94,8 +125,7 @@ int8_t SnakeEffect::_manual_decision() {
}*/ }*/
bool SnakeEffect::_is_free(uint8_t dir) { bool SnakeEffect::_is_free(uint8_t dir) {
Coords np = _new_pos(dir); return _free_spaces(dir)!=0;
return np.x>=0 && np.x<window->width && np.y>=0 && np.y<window->height && _map[_xy2i(np)]==0;
} }
uint8_t SnakeEffect::_free_spaces(uint8_t dir) { uint8_t SnakeEffect::_free_spaces(uint8_t dir) {
@ -109,15 +139,15 @@ uint8_t SnakeEffect::_free_spaces(uint8_t dir) {
case SNAKE_DIR_WEST: x=-1; break; case SNAKE_DIR_WEST: x=-1; break;
} }
Coords p(_pos); Coords p(_pos);
uint8_t i; uint8_t i=0;
for(i=0; i<window->width || i<window->height; i++) { while (true) {
p.x += x; p.x += x;
p.y += y; p.y += y;
if (p.x<0 || p.x>=window->width || p.y<0 || p.y>=window->height || _map[_xy2i(p)]!=0) { if (p.x<0 || p.x>=window->width || p.y<0 || p.y>=window->height || _map[_xy2i(p)]!=0) {
break; return i;
} }
i++;
} }
return i;
} }
uint8_t SnakeEffect::_to_apple(uint8_t dir) { uint8_t SnakeEffect::_to_apple(uint8_t dir) {
@ -159,10 +189,7 @@ Coords SnakeEffect::_i2xy(uint16_t i) {
} }
void SnakeEffect::_move() { void SnakeEffect::_move() {
if (_dying==0 && !_is_free(_dir)) {
_dying = 150;
return;
}
if (_dying > 0) { if (_dying > 0) {
_dying--; _dying--;
@ -175,11 +202,21 @@ void SnakeEffect::_move() {
} }
unsigned long now = millis(); unsigned long now = millis();
if (_last_move_at < now && now - _last_move_at < 100) { if (_last_move_at < now && now - _last_move_at < 0) {
return; return;
} }
_round++;
_last_move_at = now; _last_move_at = now;
_decide();
if (_dying==0 && !_is_free(_dir)) {
_dying = 150;
return;
}
_pos = _new_pos(_dir); _pos = _new_pos(_dir);
if (SNAKE_DEBUG) LOGln("SnakeEffect * new_pos: %d, %d", _pos.x, _pos.y);
if (SNAKE_DEBUG) LOGln("SnakeEffect * apple: %d, %d", _apple.x, _apple.y);
if (_pos.x==_apple.x && _pos.y==_apple.y) { if (_pos.x==_apple.x && _pos.y==_apple.y) {
_last_apple_at = millis(); _last_apple_at = millis();
_length++; _length++;
@ -215,12 +252,9 @@ void SnakeEffect::loop(uint16_t ms) {
//CRGB color(CHSV(hue, 200, 255)); //CRGB color(CHSV(hue, 200, 255));
//window->setPixel(this->coords.x, this->coords.y, &color); //window->setPixel(this->coords.x, this->coords.y, &color);
//hue++; //hue++;
if (millis() < _last_apple_at || millis() - _last_apple_at > 30000) { if (_dying==0 && (millis() < _last_apple_at || millis() - _last_apple_at > 30000)) {
_dying = 150; _dying = 150;
} }
if (_dying==0) {
_decide();
}
_move(); _move();
_draw(); _draw();
} }

View File

@ -1,2 +1,6 @@
#!/usr/bin/gnuplot -c #!/usr/bin/gnuplot -c
set term dumb 79 49; plot 'data_set.dat' notitle set term dumb 79 49
plot 'data_set.dat' using 1:2 title 'Points', \
# 'data_set.dat' using 1:3 title 'Length' axes x1y2, \
# 'data_set.dat' using 1:4 title 'Stopped' axes x1y2, \
# 'data_set.dat' using 1:5 title 'Dead' axes x1y2

View File

@ -14,8 +14,10 @@ class Game
POINTS_MOVING_FAR = -1.5 POINTS_MOVING_FAR = -1.5
attr_reader :points, :dead, :ai, :length attr_reader :points, :dead, :ai, :length
attr_accessor :apple
def initialize(a) def initialize(a, debug=false)
@debug = debug
@ai = a @ai = a
@data = [0]*(WIDTH*HEIGHT) @data = [0]*(WIDTH*HEIGHT)
@dir = 0 @dir = 0
@ -29,6 +31,7 @@ class Game
@dead = false @dead = false
@round = 0 @round = 0
@last_apple_at = 0 @last_apple_at = 0
@count_left = @count_right = 0
place_apple() place_apple()
end end
@ -63,6 +66,8 @@ class Game
return if @dead return if @dead
decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1)) decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
#puts "Decision: #{decision}" #puts "Decision: #{decision}"
@count_left += 1 if decision==-1
@count_right += 1 if decision==1
@dir = (@dir + decision) % 4 @dir = (@dir + decision) % 4
if (free?(@dir)==0) if (free?(@dir)==0)
#puts "Dead." #puts "Dead."
@ -73,11 +78,11 @@ class Game
move move
end end
def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end
def move def move
newpos = calc_new_pos(@pos, @dir) newpos = calc_new_pos(@pos, @dir)
#puts "Newpos: #{newpos}" puts "Newpos: #{newpos}" if @debug
if newpos==@apple if newpos==@apple
@length+=1 @length+=1
@points += POINTS_APPLE @points += POINTS_APPLE
@ -156,23 +161,41 @@ class Game
@dead = true @dead = true
end end
def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end
def ai_ranking; ai.ranking; end def ai_ranking; ai.ranking; end
end end
class AI class AI
attr_reader :weights NETWORK_LAYOUT = [6, 4, 3]
attr_reader :weights, :id
attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
def initialize(w=nil) def initialize(w=nil, debug=false)
@debug = debug
reset() reset()
@rounds = 1 @rounds = 1
@id = rand(0xFFFFFF)
if w==nil if w==nil
@weights = Array.new(18) { rand() * 2.0 - 1.0 } @weights = Array.new(network_size()) { rand() * 2.0 - 1.0 }
puts "Initialized with random values: #{@weights}" if @debug
else else
@weights = w if w[0].is_a? Integer
@weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten
else
@weights = w
end
puts "Initialized with given values: #{@weights}" if @debug
end end
end
def network_size
s = 0
(0...(NETWORK_LAYOUT.count-1)).each do |i|
s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1]
end
return s
end end
def reset def reset
@ -191,43 +214,59 @@ class AI
def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right) def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right] inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
#pp inputs puts "Inputs: #{inputs}" if @debug
outputs = [0, 0, 0] outputs = nil
(0...18).each do |x| x = 0
o = x/6 (1...(NETWORK_LAYOUT.count)).each do |i|
i = x%6 c_in = NETWORK_LAYOUT[i-1]
outputs[o] += inputs[i] * @weights[x] c_out = NETWORK_LAYOUT[i]
outputs = Array.new(c_out){0.0}
(0...c_out).each do |o|
(0...c_in).each do |i|
outputs[o] += inputs[i] * @weights[x]
x+=1
end
end
inputs = outputs
end end
max = 0 max = 0
take = 0 take = 0
(0...3).each do |x| (0...(NETWORK_LAYOUT.last)).each do |x|
if outputs[x]>max if outputs[x]>max
max = outputs[x] max = outputs[x]
take = x take = x
end end
end end
puts "Decision: #{take-1}" if @debug
return take-1 return take-1
end end
def evolve def evolve
w = @weights.dup w = @weights.dup
action = rand(4) action = rand(5)
if action==0 #swap if action==0 #swap
i1 = rand(18) i1 = rand(network_size())
i2 = rand(18) i2 = rand(network_size())
temp = w[i1] temp = w[i1]
w[i1] = w[i2] w[i1] = w[i2]
w[i2] = temp w[i2] = temp
elsif action==1 #change single value elsif action==1 #change single value
i = rand(18) i = rand(network_size())
w[i] = rand() * 2 - 1.0 w[i] = rand() * 2 - 1.0
elsif action==2 #invert single value elsif action==2 #invert single value
i = rand(18) i = rand(network_size())
w[i] *= -1.0 w[i] *= -1.0
elsif action==3
(0...network_size()).each do |i|
w[i] = rand() * 2 - 1.0 if rand(5)==0
end
else #change multiple values else #change multiple values
(0...18).each do |i| (0...network_size()).each do |i|
if (rand(5)==0) if (rand(5)==0)
w[i] = rand() * 2 - 1 w[i] += rand() / 5.0 - 0.1
w[i] = 1.0 if w[i]>1.0
w[i] = -1.0 if w[i]<-1.0
end end
end end
end end
@ -238,7 +277,7 @@ class AI
def merge(ai) def merge(ai)
w = @weights.dup w = @weights.dup
w2 = ai.weights.dup w2 = ai.weights.dup
(0...18).each do |i| (0...network_size()).each do |i|
if rand(2)==0 if rand(2)==0
w[i] = w2[i] w[i] = w2[i]
end end
@ -249,19 +288,33 @@ class AI
def average(ai) def average(ai)
w = @weights.dup w = @weights.dup
w2 = ai.weights w2 = ai.weights
(0...18).each do |i| (0...network_size()).each do |i|
w[i] = (w[i] + w2[i]) / 2.0 w[i] = (w[i] + w2[i]) / 2.0
end end
return AI.new(w) return AI.new(w)
end end
def dump def dump
puts "Data:" puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};"
puts "float _weights[18] = {#{@weights.join(", ")}};"
#puts "Simplified: #{simplified}" #puts "Simplified: #{simplified}"
end end
end end
## Simulate
=begin
ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75,
0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d,
0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619,
0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true)
g = Game.new(ai, true)
g.apple = [3, 3]
10.times do
g.loop
end
exit
=end
graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w") graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
graph.puts("# Round - Points - Length - Stopped - Dead") graph.puts("# Round - Points - Length - Stopped - Dead")
@ -272,6 +325,7 @@ games = []
ais[x] = AI.new#(SEEDS.sample) ais[x] = AI.new#(SEEDS.sample)
end end
best_old_game = nil
best_old_ai = nil best_old_ai = nil
begin begin
loop do loop do
@ -297,8 +351,14 @@ loop do
games_sorted = games.sort_by(&:ai_ranking).reverse.take(5) games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
g = games_sorted[0] g = games_sorted[0]
if (round-1)%50==0
puts "----------------------------------------------------"
puts "Round | Points | Length | Stopped | Dead | ID "
puts "----------------------------------------------------"
end
puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")] puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id]
graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100]) graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
graph.flush graph.flush
@ -306,7 +366,8 @@ loop do
g.ai.dump g.ai.dump
end end
best_old_ai = g.ai best_old_game = g
best_old_ai = g.ai.dup
ais = [] ais = []
games_sorted.each do |g| games_sorted.each do |g|
@ -331,6 +392,9 @@ loop do
round+=1 round+=1
end end
rescue SystemExit, Interrupt rescue SystemExit, Interrupt
puts
puts
puts "// Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100]
best_old_ai.dump best_old_ai.dump
graph.close graph.close
end end