Effect snake now uses a "proper", albeit simple neural network instead of simple decisions.
This commit is contained in:
parent
bd0041619a
commit
230a1f1a91
1
.gitignore
vendored
1
.gitignore
vendored
@ -10,3 +10,4 @@ include/config.h
|
||||
.pioenvs
|
||||
.DS_Store
|
||||
.vscode
|
||||
src/tools/snakenet/data_set.dat
|
||||
|
@ -18,17 +18,16 @@ private:
|
||||
uint8_t _length;
|
||||
unsigned long _last_apple_at;
|
||||
unsigned long _last_move_at;
|
||||
// The following code is a handwritten "ai". Useful for testing and stuff.
|
||||
//int8_t _decisions[64] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,1,-1,-1,-1,-1,0,0,0,0,-1,-1,0,0,0,1,0,0,-1,-1,0,0};
|
||||
int8_t _decisions[64] = {0, 1, 1, -1, 0, 1, 1, -1, 0, -1, 1, -1, -1, 0, 1, -1, -1, 0, 0, 1, -1, -1, 0, 0, -1, 0, 0, -1, 0, -1, 0, -1, -1, 0, 0, -1, -1, 0, 1, -1, -1, 1, 1, -1, -1, 1, -1, 0, 0, 1, 0, 1, -1, -1, 0, 0, 0, -1, 0, 1, 0, -1, 0, -1};
|
||||
//int8_t _decisions[64] = {1, 1, 0, 0, 1, 1, -1, -1, 0, -1, 0, -1, -1, 0, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 0, -1, 0, 1, 1, 1, -1, 0, 0, 0, -1, -1, -1, 1, -1, -1, 1, 1, -1, 0, 1, 1, 1, 0, 0, -1, -1, 0, -1, 1, 0, 0, 0, -1, 0, -1, 1, 1};
|
||||
// 204.6 points, length 35, 58% stopped, 42% dead
|
||||
float _weights[18] = {0.8613356309729567, 0.1010670216231977, -0.03801953620401166, 0.7556556498067926, -0.9925124063206012, 0.6375227768823608, 0.7216340201735381, 0.5557451907997892, 0.03496949604344035, 0.7238725631217913, 0.9070987343528141, 0.6518810721526125, -0.17322587217593544, -0.7726195238221361, 0.8044226332955624, 0.8434782354002677, 0.8508760698750302, 0.47735675603010397};
|
||||
uint16_t _xy2i(uint8_t x, uint8_t y);
|
||||
uint16_t _xy2i(Coords c);
|
||||
Coords _i2xy(uint16_t i);
|
||||
Coords _new_pos(uint8_t dir);
|
||||
uint8_t _dying = 0;
|
||||
bool _is_free(uint8_t dir);
|
||||
bool _to_apple(uint8_t dir);
|
||||
uint8_t _free_spaces(uint8_t dir);
|
||||
uint8_t _to_apple(uint8_t dir);
|
||||
void _place_apple();
|
||||
void _init();
|
||||
void _decide();
|
||||
|
@ -38,15 +38,33 @@ void SnakeEffect::_place_apple() {
|
||||
}
|
||||
|
||||
void SnakeEffect::_decide() {
|
||||
uint8_t input = 0;
|
||||
if (_is_free(_dir - 1)) input |= 1<<5;
|
||||
if (_is_free(_dir)) input |= 1<<4;
|
||||
if (_is_free(_dir + 1)) input |= 1<<3;
|
||||
if (_to_apple(_dir - 1)) input |= 1<<2;
|
||||
if (_to_apple(_dir)) input |= 1<<1;
|
||||
if (_to_apple(_dir + 1)) input |= 1;
|
||||
uint8_t f_l = _free_spaces(_dir - 1);
|
||||
uint8_t f_s = _free_spaces(_dir);
|
||||
uint8_t f_r = _free_spaces(_dir + 1);
|
||||
uint8_t a_l = _to_apple(_dir - 1);
|
||||
uint8_t a_s = _to_apple(_dir);
|
||||
uint8_t a_r = _to_apple(_dir + 1);
|
||||
|
||||
_dir += _decisions[input];
|
||||
uint8_t inputs[6] = {f_l, f_s, f_r, a_l, a_s, a_r};
|
||||
|
||||
float outputs[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
for (int i=0; i<18; i++) {
|
||||
uint8_t out = i/6;
|
||||
uint8_t in = i%6;
|
||||
outputs[out] += _weights[i] * inputs[in];
|
||||
}
|
||||
|
||||
int8_t decision = 0;
|
||||
if (outputs[0]>=outputs[1] && outputs[0]>=outputs[2]) {
|
||||
decision = -1;
|
||||
} else if (outputs[1]>=outputs[2]) {
|
||||
decision = 0;
|
||||
} else {
|
||||
decision = 1;
|
||||
}
|
||||
|
||||
_dir += decision;
|
||||
if (_dir < 0) _dir += 4;
|
||||
}
|
||||
|
||||
@ -80,15 +98,40 @@ bool SnakeEffect::_is_free(uint8_t dir) {
|
||||
return np.x>=0 && np.x<window->width && np.y>=0 && np.y<window->height && _map[_xy2i(np)]==0;
|
||||
}
|
||||
|
||||
bool SnakeEffect::_to_apple(uint8_t dir) {
|
||||
uint8_t SnakeEffect::_free_spaces(uint8_t dir) {
|
||||
int8_t x=0;
|
||||
int8_t y=0;
|
||||
uint8_t d = dir % 4;
|
||||
switch(d) {
|
||||
case SNAKE_DIR_NORTH: return _apple.y<_pos.y;
|
||||
case SNAKE_DIR_EAST: return _apple.x>_pos.x;
|
||||
case SNAKE_DIR_SOUTH: return _apple.y>_pos.y;
|
||||
case SNAKE_DIR_WEST: return _apple.x<_pos.x;
|
||||
case SNAKE_DIR_NORTH: y=-1; break;
|
||||
case SNAKE_DIR_EAST: x=1; break;
|
||||
case SNAKE_DIR_SOUTH: y=1; break;
|
||||
case SNAKE_DIR_WEST: x=-1; break;
|
||||
}
|
||||
return true;
|
||||
Coords p(_pos);
|
||||
uint8_t i;
|
||||
for(i=0; i<window->width || i<window->height; i++) {
|
||||
p.x += x;
|
||||
p.y += y;
|
||||
if (p.x<0 || p.x>=window->width || p.y<0 || p.y>=window->height || _map[_xy2i(p)]!=0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
uint8_t SnakeEffect::_to_apple(uint8_t dir) {
|
||||
uint8_t d = dir % 4;
|
||||
int8_t d_x = _apple.x - _pos.x;
|
||||
int8_t d_y = _apple.y - _pos.y;
|
||||
|
||||
switch(d) {
|
||||
case SNAKE_DIR_NORTH: return d_y < 0 ? -d_y : 0;
|
||||
case SNAKE_DIR_EAST: return d_x > 0 ? d_x : 0;
|
||||
case SNAKE_DIR_SOUTH: return d_y > 0 ? d_y : 0;
|
||||
case SNAKE_DIR_WEST: return d_x < 0 ? -d_x : 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Coords SnakeEffect::_new_pos(uint8_t dir) {
|
||||
|
2
src/tools/snakenet/plot
Executable file
2
src/tools/snakenet/plot
Executable file
@ -0,0 +1,2 @@
|
||||
#!/usr/bin/gnuplot -c
|
||||
set term dumb 79 49; plot 'data_set.dat' notitle
|
@ -3,14 +3,6 @@ require 'rubygems'
|
||||
require 'pp'
|
||||
require 'thread/pool'
|
||||
|
||||
SEEDS = [
|
||||
#[0, 1, -1, 0, -1, 0, 1, -1, 1, -1, 1, 1, 1, 0, -1, 0, 0, 1, 0, -1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, -1, 0, 1, -1, 0, 0, 0, -1, -1, -1, 0, -1, 1, 0, -1, -1, 0, -1, 1, 0, 1, 1, 0, -1, -1, 0, -1, 1, 0, -1, 0],
|
||||
#[-1, 1, -1, 0, -1, 1, -1, -1, 1, 1, -1, 1, 1, -1, 1, 0, 0, -1, 0, 1, 0, -1, -1, 1, 0, 0, 0, -1, 0, 0, 1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -1, 0, -1, 0, -1, 1, 1, 1, -1, 0, -1, 1, 0, 0, 0, -1, -1, -1, 0, -1, 1, 1, -1, 1],
|
||||
#[1, 1, -1, 0, 1, 1, 0, -1, 1, 1, 1, 1, 1, -1, -1, -1, 0, -1, 0, 1, 0, -1, 0, 1, 0, 0, 0, -1, 0, 1, 1, 1, -1, -1, -1, 1, -1, -1, 0, -1, -1, -1, -1, 0, -1, -1, 1, 1, 0, 0, -1, 1, 0, 0, 0, 1, -1, -1, 0, 0, 1, 1, 0, 1],
|
||||
[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,1,-1,-1,-1,-1,0,0,0,0,0,0,-1,-1,0,0,0,1,0,0,-1,-1]
|
||||
|
||||
]
|
||||
|
||||
GAMES_PER_ROUND = 50
|
||||
|
||||
class Game
|
||||
@ -67,10 +59,13 @@ class Game
|
||||
def draw; puts to_s; puts; end
|
||||
|
||||
def loop
|
||||
#puts "Loop. Position: #{@pos}"
|
||||
return if @dead
|
||||
decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
|
||||
#puts "Decision: #{decision}"
|
||||
@dir = (@dir + decision) % 4
|
||||
if (!free?(@dir))
|
||||
if (free?(@dir)==0)
|
||||
#puts "Dead."
|
||||
die()
|
||||
return
|
||||
end
|
||||
@ -82,6 +77,7 @@ class Game
|
||||
|
||||
def move
|
||||
newpos = calc_new_pos(@pos, @dir)
|
||||
#puts "Newpos: #{newpos}"
|
||||
if newpos==@apple
|
||||
@length+=1
|
||||
@points += POINTS_APPLE
|
||||
@ -119,8 +115,25 @@ class Game
|
||||
end
|
||||
|
||||
def free?(dir)
|
||||
newpos = calc_new_pos(@pos, dir)
|
||||
return newpos[0]>=0 && newpos[0]<WIDTH && newpos[1]>=0 && newpos[1]<HEIGHT && @data[newpos[1]*WIDTH + newpos[0]]==0
|
||||
# count the free fields from @pos in dir until a wall or something
|
||||
dir = dir % 4
|
||||
x=y=0
|
||||
case dir
|
||||
when 0 then y=-1
|
||||
when 1 then x=+1
|
||||
when 2 then y=+1
|
||||
when 3 then x=-1
|
||||
end
|
||||
i = 0
|
||||
pos = @pos.dup
|
||||
|
||||
[WIDTH, HEIGHT].max.times do
|
||||
pos[0]+=x
|
||||
pos[1]+=y
|
||||
break if pos[0]<0 || pos[0]>=WIDTH || pos[1]<0 || pos[1]>=HEIGHT || @data[pos[1]*WIDTH + pos[0]]!=0
|
||||
i+=1
|
||||
end
|
||||
return i
|
||||
end
|
||||
|
||||
def apple?(dir)
|
||||
@ -128,10 +141,10 @@ class Game
|
||||
d_x = @apple[0] - @pos[0]
|
||||
d_y = @apple[1] - @pos[1]
|
||||
case dir
|
||||
when 0 then return @apple[1]<@pos[1]
|
||||
when 1 then return @apple[0]>@pos[0]
|
||||
when 2 then return @apple[1]>@pos[1]
|
||||
when 3 then return @apple[0]<@pos[0]
|
||||
when 0 then return @apple[1]<@pos[1] ? -d_y : 0
|
||||
when 1 then return @apple[0]>@pos[0] ? d_x : 0
|
||||
when 2 then return @apple[1]>@pos[1] ? d_y : 0
|
||||
when 3 then return @apple[0]<@pos[0] ? -d_x : 0
|
||||
#when 0 then return d_y<0 && d_x.abs<d_y.abs
|
||||
#when 1 then return d_x>0 && d_x.abs>d_y.abs
|
||||
#when 2 then return d_y>0 && d_x.abs<d_y.abs
|
||||
@ -156,7 +169,7 @@ class AI
|
||||
reset()
|
||||
@rounds = 1
|
||||
if w==nil
|
||||
@weights = Array.new(2**6){ rand(3)-1 }
|
||||
@weights = Array.new(18) { rand() * 2.0 - 1.0 }
|
||||
else
|
||||
@weights = w
|
||||
end
|
||||
@ -177,16 +190,13 @@ class AI
|
||||
end
|
||||
|
||||
def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
|
||||
input = apple_right ? 1 : 0 | (apple_straight ? 1<<1 : 0) | (apple_left ? 1<<2 : 0) | (right_free ? 1<<3 : 0) | (straight_free ? 1<<4 : 0) | (left_free ? 1<<5 : 0)
|
||||
w = @weights[input]
|
||||
return w
|
||||
=begin
|
||||
inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
|
||||
#pp inputs
|
||||
outputs = [0, 0, 0]
|
||||
(0...18).each do |x|
|
||||
o = x/6
|
||||
i = x%6
|
||||
outputs[o] += (inputs[i] ? 1.0 : 0.0) * @weights[x]
|
||||
outputs[o] += inputs[i] * @weights[x]
|
||||
end
|
||||
max = 0
|
||||
take = 0
|
||||
@ -197,91 +207,64 @@ class AI
|
||||
end
|
||||
end
|
||||
return take-1
|
||||
=end
|
||||
end
|
||||
|
||||
def evolve
|
||||
w = @weights.dup
|
||||
loop do
|
||||
if rand(2)==0
|
||||
# swap
|
||||
i1 = rand(64)
|
||||
i2 = rand(64)
|
||||
next if w[i1]==w[i2]
|
||||
temp = w[i1]
|
||||
w[i1] = w[i2]
|
||||
w[i2] = temp
|
||||
break
|
||||
else
|
||||
# set new
|
||||
i = rand(64)
|
||||
v = rand(3)-1
|
||||
next if w[i]==v
|
||||
w[i] = v
|
||||
break
|
||||
action = rand(4)
|
||||
if action==0 #swap
|
||||
i1 = rand(18)
|
||||
i2 = rand(18)
|
||||
temp = w[i1]
|
||||
w[i1] = w[i2]
|
||||
w[i2] = temp
|
||||
elsif action==1 #change single value
|
||||
i = rand(18)
|
||||
w[i] = rand() * 2 - 1.0
|
||||
elsif action==2 #invert single value
|
||||
i = rand(18)
|
||||
w[i] *= -1.0
|
||||
else #change multiple values
|
||||
(0...18).each do |i|
|
||||
if (rand(5)==0)
|
||||
w[i] = rand() * 2 - 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Sanity checks
|
||||
(0...(2**6)).each do |i|
|
||||
w[i] = rand(3)-1 if (i&(1<<4)==0) && w[i]==0
|
||||
w[i] = rand(3)-1 if (i&(1<<3)==0) && w[i]==1
|
||||
w[i] = rand(3)-1 if (i&(1<<5)==0) && w[i]==-1
|
||||
end
|
||||
#i = rand(2**6)
|
||||
#v=0
|
||||
#loop do
|
||||
# v = rand(3)-1
|
||||
# break if v!=w[i]
|
||||
#end
|
||||
#w[i]=v
|
||||
|
||||
#x = rand(18)
|
||||
#w[x] += rand(0.4)-0.2
|
||||
#w[x] = 0.0 if w[x]<0
|
||||
#w[x] = 1.0 if w[x]>1
|
||||
return AI.new(w)
|
||||
end
|
||||
|
||||
def merge(ai)
|
||||
w = @weights.dup
|
||||
w2 = ai.weights
|
||||
(0...(2**6)).each do |i|
|
||||
if rand(5)==0
|
||||
w2 = ai.weights.dup
|
||||
(0...18).each do |i|
|
||||
if rand(2)==0
|
||||
w[i] = w2[i]
|
||||
end
|
||||
end
|
||||
return AI.new(w)
|
||||
end
|
||||
|
||||
def simplified
|
||||
#res = []
|
||||
#(0...(2**6)).each do |i|
|
||||
# args = i.to_s(2).rjust(6, '0').split("").map{|c| c=='1'}
|
||||
# res << decide(*args)
|
||||
#end
|
||||
v = 0
|
||||
count = 0
|
||||
result = []
|
||||
(0...(2**6)).each do |i|
|
||||
v = v<<2 | @weights[i]
|
||||
count += 1
|
||||
if count==16
|
||||
result << "0x" + v.to_s(16)
|
||||
count = 0
|
||||
v = 0
|
||||
end
|
||||
def average(ai)
|
||||
w = @weights.dup
|
||||
w2 = ai.weights
|
||||
(0...18).each do |i|
|
||||
w[i] = (w[i] + w2[i]) / 2.0
|
||||
end
|
||||
return "uint32_t weights[4] = {" + result.join(", ") + "};"
|
||||
return AI.new(w)
|
||||
end
|
||||
|
||||
def dump
|
||||
puts "Data:"
|
||||
puts "int8_t _decisions[64] = {#{@weights.join(", ")}};"
|
||||
puts "float _weights[18] = {#{@weights.join(", ")}};"
|
||||
#puts "Simplified: #{simplified}"
|
||||
end
|
||||
end
|
||||
|
||||
graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
|
||||
graph.puts("# Round - Points - Length - Stopped - Dead")
|
||||
|
||||
ais = []
|
||||
round = 1
|
||||
games = []
|
||||
@ -297,10 +280,10 @@ loop do
|
||||
games[x] = Game.new(ais[x])
|
||||
end
|
||||
|
||||
#pool = Thread.pool(16)
|
||||
pool = Thread.pool(16)
|
||||
games.each do |g|
|
||||
|
||||
# pool.process do
|
||||
pool.process do
|
||||
15_000.times do
|
||||
g.loop
|
||||
break if g.dead || g.stopped?
|
||||
@ -308,16 +291,20 @@ loop do
|
||||
g.ai.add_ranking(g)
|
||||
|
||||
end
|
||||
#end
|
||||
#pool.shutdown
|
||||
end
|
||||
pool.shutdown
|
||||
end
|
||||
|
||||
games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
|
||||
g = games_sorted[0]
|
||||
|
||||
if true#round%10==0
|
||||
puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100]
|
||||
end
|
||||
puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")]
|
||||
graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
|
||||
graph.flush
|
||||
|
||||
if round%10==0
|
||||
g.ai.dump
|
||||
end
|
||||
|
||||
best_old_ai = g.ai
|
||||
|
||||
@ -337,8 +324,13 @@ loop do
|
||||
5.times do
|
||||
ais << games_sorted[1].ai.merge(games_sorted[0].ai)
|
||||
end
|
||||
ais << games_sorted[0].ai.average(games_sorted[1].ai)
|
||||
10.times do
|
||||
ais << AI.new
|
||||
end
|
||||
round+=1
|
||||
end
|
||||
rescue SystemExit, Interrupt
|
||||
best_old_ai.dump
|
||||
graph.close
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user