Snake effect now uses a neural net with a hidden layer; a bug in _is_free() that lead to lots of snakes committing suicide was fixed; neural net weights are now given as binary representation of a float to prevent rounding errors.

This commit is contained in:
Fabian Schlenz 2019-10-18 06:40:09 +02:00
parent 306f72d838
commit 54925dfc0e
4 changed files with 178 additions and 62 deletions

View File

@ -8,6 +8,8 @@
#define SNAKE_DIR_SOUTH 2
#define SNAKE_DIR_WEST 3
#define SNAKE_DEBUG false
class SnakeEffect : public Effect {
private:
Coords _pos;
@ -18,10 +20,22 @@ private:
uint8_t _length;
unsigned long _last_apple_at;
unsigned long _last_move_at;
// 204.6 points, length 35, 58% stopped, 42% dead
// float _weights[18] = {0.8613356309729567, 0.1010670216231977, -0.03801953620401166, 0.7556556498067926, -0.9925124063206012, 0.6375227768823608, 0.7216340201735381, 0.5557451907997892, 0.03496949604344035, 0.7238725631217913, 0.9070987343528141, 0.6518810721526125, -0.17322587217593544, -0.7726195238221361, 0.8044226332955624, 0.8434782354002677, 0.8508760698750302, 0.47735675603010397};
// Round 2077: 208.6 points, length 36, 50% stopped, 50% dead
float _weights[18] = {-0.1648448727142625, -0.7505284618312464, 0.0037384390323656203, -0.6678743938665241, 0.917231716139375, 0.05960885292612439, -0.7585782758281971, 0.8275111343144115, 0.7821852602229209, 0.29970244548911523, 0.9737979047604144, -0.2384723067003974, -0.7854491847031548, 0.44652781127984964, 0.9127919336231882, 0.3309096816699824, -0.9071832356948208, -0.23802066581485848};
uint16_t _round;
// Neural net config
// These are actually float values. But in order to prevent rounding errors and stuff, they are provided
// in form of the raw binary data of the IEE754 floating point numbers.
// In _decide() there's code to memcpy()-convert them to a float.
// Round 340, 223.4 points, length 39, 36% stopped, 64% died
// const uint32_t _weights[36] = {0xbd8e626e, 0xbee2cd2c, 0x3e4d5cab, 0x3eceb8c3, 0xbed0a514, 0x3ec62438, 0x3e947ed4, 0xbe4b8bf2, 0xbf301113, 0xbf3f0a75, 0x3f1868f7, 0xbf0253ca, 0xbedca2f2, 0xbd547c6d, 0x3edd6a8a, 0xbd4b97b6, 0x3f64ec26, 0xbe5323c1, 0x3eccf87d, 0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbf03f08e, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3dcd78, 0x3f37a58d, 0x3ef4a25b};
// Round 630, 221.0 points, length 38, 36% stopped, 64% died
const uint32_t _weights[36] = {0xbd25943f, 0xbf279d81, 0x3e25d128, 0x3ec62438, 0x3f0e719c, 0x3eefbea9, 0x3e947ed4, 0xbe5323c1, 0xbf2d4796, 0xbf3f0a75, 0x3f0e45d9, 0xbf0253ca, 0xbedca2f2, 0xbd79073c, 0x3ede80ec, 0xbd4b97b6, 0x3f69a6be, 0xbe4b8bf2, 0x3eccf87d, 0xbf301113, 0xbf62b6e8, 0xbf71daf6, 0xbf204130, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4954eb, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3b4e44, 0x3f484d59, 0x3edd6a8a};
const uint8_t _net_layout[3] = {6, 4, 3};
const uint8_t _net_layers = 3;
const uint8_t _net_total_size = 36;
uint16_t _xy2i(uint8_t x, uint8_t y);
uint16_t _xy2i(Coords c);
Coords _i2xy(uint16_t i);

View File

@ -10,6 +10,7 @@ SnakeEffect::SnakeEffect() {
void SnakeEffect::_init() {
_dying = 0;
_round = 0;
_last_apple_at = millis();
_last_move_at = millis();
_dir = SNAKE_DIR_NORTH;
@ -17,6 +18,9 @@ void SnakeEffect::_init() {
_pos = {(uint8_t)(window->width/2), (uint8_t)(window->height/2)};
for (int i=0; i<_pixels; i++) _map[i]=0;
_map[_xy2i(_pos)]=1;
_map[_xy2i(_pos)+window->width*1]=2;
_map[_xy2i(_pos)+window->width*2]=3;
_map[_xy2i(_pos)+window->width*3]=4;
_place_apple();
}
@ -26,6 +30,10 @@ SnakeEffect::~SnakeEffect() {
}
void SnakeEffect::_place_apple() {
if (SNAKE_DEBUG) {
_apple = {3, 3};
return;
}
if (_length < _pixels) {
uint8_t start = random8(_pixels);
for (int i=0; i<_pixels; i++) {
@ -45,27 +53,50 @@ void SnakeEffect::_decide() {
uint8_t a_s = _to_apple(_dir);
uint8_t a_r = _to_apple(_dir + 1);
uint8_t inputs[6] = {f_l, f_s, f_r, a_l, a_s, a_r};
float outputs[3] = {0.0, 0.0, 0.0};
for (int i=0; i<18; i++) {
uint8_t out = i/6;
uint8_t in = i%6;
outputs[out] += _weights[i] * inputs[in];
float* inputs = new float[6];
inputs[0] = f_l;
inputs[1] = f_s;
inputs[2] = f_r;
inputs[3] = a_l;
inputs[4] = a_s;
inputs[5] = a_r;
if (SNAKE_DEBUG) LOGln("SnakeEffect * Position: %d, %d - Inputs: %3.1f %3.1f %3.1f %3.1f %3.1f %3.1f", _pos.x, _pos.y, inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5]);
float* outputs = NULL;
uint8_t i=0;
for (uint8_t layer=1; layer<_net_layers; layer++) {
outputs = new float[_net_layout[layer]];
for (uint8_t j=0; j<_net_layout[layer]; j++) {
outputs[j] = 0.0;
}
for (uint8_t idx_out=0; idx_out<_net_layout[layer]; idx_out++) {
for (uint8_t idx_in=0; idx_in<_net_layout[layer-1]; idx_in++) {
float weight;
memcpy(&weight, &(_weights[i]), sizeof(weight));
outputs[idx_out] += weight * inputs[idx_in];
//outputs[idx_out] += (*(float*)&(_weights[i])) * inputs[idx_in];
i++;
}
}
delete inputs;
inputs = outputs;
}
int8_t decision = 0;
if (outputs[0]>=outputs[1] && outputs[0]>=outputs[2]) {
decision = -1;
} else if (outputs[1]>=outputs[2]) {
decision = 0;
} else {
decision = 1;
float last;
for (uint8_t i=0; i<_net_layout[_net_layers - 1]; i++) {
if (i==0 || outputs[i]>last) {
last = outputs[i];
decision = i;
}
}
decision = decision - 1;
delete outputs;
if (SNAKE_DEBUG) LOGln("SnakeEffect * Decision: %d", decision);
_dir += decision;
if (_dir < 0) _dir += 4;
if (_dir > 3) _dir -= 4;
}
/**
@ -94,8 +125,7 @@ int8_t SnakeEffect::_manual_decision() {
}*/
bool SnakeEffect::_is_free(uint8_t dir) {
Coords np = _new_pos(dir);
return np.x>=0 && np.x<window->width && np.y>=0 && np.y<window->height && _map[_xy2i(np)]==0;
return _free_spaces(dir)!=0;
}
uint8_t SnakeEffect::_free_spaces(uint8_t dir) {
@ -109,16 +139,16 @@ uint8_t SnakeEffect::_free_spaces(uint8_t dir) {
case SNAKE_DIR_WEST: x=-1; break;
}
Coords p(_pos);
uint8_t i;
for(i=0; i<window->width || i<window->height; i++) {
uint8_t i=0;
while (true) {
p.x += x;
p.y += y;
if (p.x<0 || p.x>=window->width || p.y<0 || p.y>=window->height || _map[_xy2i(p)]!=0) {
break;
}
}
return i;
}
i++;
}
}
uint8_t SnakeEffect::_to_apple(uint8_t dir) {
uint8_t d = dir % 4;
@ -159,10 +189,7 @@ Coords SnakeEffect::_i2xy(uint16_t i) {
}
void SnakeEffect::_move() {
if (_dying==0 && !_is_free(_dir)) {
_dying = 150;
return;
}
if (_dying > 0) {
_dying--;
@ -175,11 +202,21 @@ void SnakeEffect::_move() {
}
unsigned long now = millis();
if (_last_move_at < now && now - _last_move_at < 100) {
if (_last_move_at < now && now - _last_move_at < 0) {
return;
}
_round++;
_last_move_at = now;
_decide();
if (_dying==0 && !_is_free(_dir)) {
_dying = 150;
return;
}
_pos = _new_pos(_dir);
if (SNAKE_DEBUG) LOGln("SnakeEffect * new_pos: %d, %d", _pos.x, _pos.y);
if (SNAKE_DEBUG) LOGln("SnakeEffect * apple: %d, %d", _apple.x, _apple.y);
if (_pos.x==_apple.x && _pos.y==_apple.y) {
_last_apple_at = millis();
_length++;
@ -215,12 +252,9 @@ void SnakeEffect::loop(uint16_t ms) {
//CRGB color(CHSV(hue, 200, 255));
//window->setPixel(this->coords.x, this->coords.y, &color);
//hue++;
if (millis() < _last_apple_at || millis() - _last_apple_at > 30000) {
if (_dying==0 && (millis() < _last_apple_at || millis() - _last_apple_at > 30000)) {
_dying = 150;
}
if (_dying==0) {
_decide();
}
_move();
_draw();
}

View File

@ -1,2 +1,6 @@
#!/usr/bin/gnuplot -c
set term dumb 79 49; plot 'data_set.dat' notitle
set term dumb 79 49
plot 'data_set.dat' using 1:2 title 'Points', \
# 'data_set.dat' using 1:3 title 'Length' axes x1y2, \
# 'data_set.dat' using 1:4 title 'Stopped' axes x1y2, \
# 'data_set.dat' using 1:5 title 'Dead' axes x1y2

View File

@ -14,8 +14,10 @@ class Game
POINTS_MOVING_FAR = -1.5
attr_reader :points, :dead, :ai, :length
attr_accessor :apple
def initialize(a)
def initialize(a, debug=false)
@debug = debug
@ai = a
@data = [0]*(WIDTH*HEIGHT)
@dir = 0
@ -29,6 +31,7 @@ class Game
@dead = false
@round = 0
@last_apple_at = 0
@count_left = @count_right = 0
place_apple()
end
@ -63,6 +66,8 @@ class Game
return if @dead
decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
#puts "Decision: #{decision}"
@count_left += 1 if decision==-1
@count_right += 1 if decision==1
@dir = (@dir + decision) % 4
if (free?(@dir)==0)
#puts "Dead."
@ -73,11 +78,11 @@ class Game
move
end
def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end
def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end
def move
newpos = calc_new_pos(@pos, @dir)
#puts "Newpos: #{newpos}"
puts "Newpos: #{newpos}" if @debug
if newpos==@apple
@length+=1
@points += POINTS_APPLE
@ -156,23 +161,41 @@ class Game
@dead = true
end
def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end
def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end
def ai_ranking; ai.ranking; end
end
class AI
attr_reader :weights
NETWORK_LAYOUT = [6, 4, 3]
attr_reader :weights, :id
attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
def initialize(w=nil)
def initialize(w=nil, debug=false)
@debug = debug
reset()
@rounds = 1
@id = rand(0xFFFFFF)
if w==nil
@weights = Array.new(18) { rand() * 2.0 - 1.0 }
@weights = Array.new(network_size()) { rand() * 2.0 - 1.0 }
puts "Initialized with random values: #{@weights}" if @debug
else
if w[0].is_a? Integer
@weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten
else
@weights = w
end
puts "Initialized with given values: #{@weights}" if @debug
end
end
def network_size
s = 0
(0...(NETWORK_LAYOUT.count-1)).each do |i|
s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1]
end
return s
end
def reset
@ -191,43 +214,59 @@ class AI
def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
#pp inputs
outputs = [0, 0, 0]
(0...18).each do |x|
o = x/6
i = x%6
puts "Inputs: #{inputs}" if @debug
outputs = nil
x = 0
(1...(NETWORK_LAYOUT.count)).each do |i|
c_in = NETWORK_LAYOUT[i-1]
c_out = NETWORK_LAYOUT[i]
outputs = Array.new(c_out){0.0}
(0...c_out).each do |o|
(0...c_in).each do |i|
outputs[o] += inputs[i] * @weights[x]
x+=1
end
end
inputs = outputs
end
max = 0
take = 0
(0...3).each do |x|
(0...(NETWORK_LAYOUT.last)).each do |x|
if outputs[x]>max
max = outputs[x]
take = x
end
end
puts "Decision: #{take-1}" if @debug
return take-1
end
def evolve
w = @weights.dup
action = rand(4)
action = rand(5)
if action==0 #swap
i1 = rand(18)
i2 = rand(18)
i1 = rand(network_size())
i2 = rand(network_size())
temp = w[i1]
w[i1] = w[i2]
w[i2] = temp
elsif action==1 #change single value
i = rand(18)
i = rand(network_size())
w[i] = rand() * 2 - 1.0
elsif action==2 #invert single value
i = rand(18)
i = rand(network_size())
w[i] *= -1.0
elsif action==3
(0...network_size()).each do |i|
w[i] = rand() * 2 - 1.0 if rand(5)==0
end
else #change multiple values
(0...18).each do |i|
(0...network_size()).each do |i|
if (rand(5)==0)
w[i] = rand() * 2 - 1
w[i] += rand() / 5.0 - 0.1
w[i] = 1.0 if w[i]>1.0
w[i] = -1.0 if w[i]<-1.0
end
end
end
@ -238,7 +277,7 @@ class AI
def merge(ai)
w = @weights.dup
w2 = ai.weights.dup
(0...18).each do |i|
(0...network_size()).each do |i|
if rand(2)==0
w[i] = w2[i]
end
@ -249,19 +288,33 @@ class AI
def average(ai)
w = @weights.dup
w2 = ai.weights
(0...18).each do |i|
(0...network_size()).each do |i|
w[i] = (w[i] + w2[i]) / 2.0
end
return AI.new(w)
end
def dump
puts "Data:"
puts "float _weights[18] = {#{@weights.join(", ")}};"
puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};"
#puts "Simplified: #{simplified}"
end
end
## Simulate
=begin
ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75,
0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d,
0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619,
0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true)
g = Game.new(ai, true)
g.apple = [3, 3]
10.times do
g.loop
end
exit
=end
graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
graph.puts("# Round - Points - Length - Stopped - Dead")
@ -272,6 +325,7 @@ games = []
ais[x] = AI.new#(SEEDS.sample)
end
best_old_game = nil
best_old_ai = nil
begin
loop do
@ -298,7 +352,13 @@ loop do
games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
g = games_sorted[0]
puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")]
if (round-1)%50==0
puts "----------------------------------------------------"
puts "Round | Points | Length | Stopped | Dead | ID "
puts "----------------------------------------------------"
end
puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id]
graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
graph.flush
@ -306,7 +366,8 @@ loop do
g.ai.dump
end
best_old_ai = g.ai
best_old_game = g
best_old_ai = g.ai.dup
ais = []
games_sorted.each do |g|
@ -331,6 +392,9 @@ loop do
round+=1
end
rescue SystemExit, Interrupt
puts
puts
puts "// Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100]
best_old_ai.dump
graph.close
end