Snake effect now uses a neural net with a hidden layer; a bug in _is_free() that lead to lots of snakes committing suicide was fixed; neural net weights are now given as binary representation of a float to prevent rounding errors.

This commit is contained in:
2019-10-18 06:40:09 +02:00
parent 306f72d838
commit 54925dfc0e
4 changed files with 178 additions and 62 deletions

View File

@ -1,2 +1,6 @@
#!/usr/bin/gnuplot -c
set term dumb 79 49; plot 'data_set.dat' notitle
set term dumb 79 49
plot 'data_set.dat' using 1:2 title 'Points', \
# 'data_set.dat' using 1:3 title 'Length' axes x1y2, \
# 'data_set.dat' using 1:4 title 'Stopped' axes x1y2, \
# 'data_set.dat' using 1:5 title 'Dead' axes x1y2

View File

@ -14,8 +14,10 @@ class Game
POINTS_MOVING_FAR = -1.5
attr_reader :points, :dead, :ai, :length
attr_accessor :apple
def initialize(a)
def initialize(a, debug=false)
@debug = debug
@ai = a
@data = [0]*(WIDTH*HEIGHT)
@dir = 0
@ -29,6 +31,7 @@ class Game
@dead = false
@round = 0
@last_apple_at = 0
@count_left = @count_right = 0
place_apple()
end
@ -63,6 +66,8 @@ class Game
return if @dead
decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
#puts "Decision: #{decision}"
@count_left += 1 if decision==-1
@count_right += 1 if decision==1
@dir = (@dir + decision) % 4
if (free?(@dir)==0)
#puts "Dead."
@ -73,11 +78,11 @@ class Game
move
end
def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end
def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end
def move
newpos = calc_new_pos(@pos, @dir)
#puts "Newpos: #{newpos}"
puts "Newpos: #{newpos}" if @debug
if newpos==@apple
@length+=1
@points += POINTS_APPLE
@ -156,23 +161,41 @@ class Game
@dead = true
end
def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end
def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end
def ai_ranking; ai.ranking; end
end
class AI
attr_reader :weights
NETWORK_LAYOUT = [6, 4, 3]
attr_reader :weights, :id
attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
def initialize(w=nil)
def initialize(w=nil, debug=false)
@debug = debug
reset()
@rounds = 1
@id = rand(0xFFFFFF)
if w==nil
@weights = Array.new(18) { rand() * 2.0 - 1.0 }
@weights = Array.new(network_size()) { rand() * 2.0 - 1.0 }
puts "Initialized with random values: #{@weights}" if @debug
else
@weights = w
if w[0].is_a? Integer
@weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten
else
@weights = w
end
puts "Initialized with given values: #{@weights}" if @debug
end
end
def network_size
s = 0
(0...(NETWORK_LAYOUT.count-1)).each do |i|
s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1]
end
return s
end
def reset
@ -191,43 +214,59 @@ class AI
def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
#pp inputs
outputs = [0, 0, 0]
(0...18).each do |x|
o = x/6
i = x%6
outputs[o] += inputs[i] * @weights[x]
puts "Inputs: #{inputs}" if @debug
outputs = nil
x = 0
(1...(NETWORK_LAYOUT.count)).each do |i|
c_in = NETWORK_LAYOUT[i-1]
c_out = NETWORK_LAYOUT[i]
outputs = Array.new(c_out){0.0}
(0...c_out).each do |o|
(0...c_in).each do |i|
outputs[o] += inputs[i] * @weights[x]
x+=1
end
end
inputs = outputs
end
max = 0
take = 0
(0...3).each do |x|
(0...(NETWORK_LAYOUT.last)).each do |x|
if outputs[x]>max
max = outputs[x]
take = x
end
end
puts "Decision: #{take-1}" if @debug
return take-1
end
def evolve
w = @weights.dup
action = rand(4)
action = rand(5)
if action==0 #swap
i1 = rand(18)
i2 = rand(18)
i1 = rand(network_size())
i2 = rand(network_size())
temp = w[i1]
w[i1] = w[i2]
w[i2] = temp
elsif action==1 #change single value
i = rand(18)
i = rand(network_size())
w[i] = rand() * 2 - 1.0
elsif action==2 #invert single value
i = rand(18)
i = rand(network_size())
w[i] *= -1.0
elsif action==3
(0...network_size()).each do |i|
w[i] = rand() * 2 - 1.0 if rand(5)==0
end
else #change multiple values
(0...18).each do |i|
(0...network_size()).each do |i|
if (rand(5)==0)
w[i] = rand() * 2 - 1
w[i] += rand() / 5.0 - 0.1
w[i] = 1.0 if w[i]>1.0
w[i] = -1.0 if w[i]<-1.0
end
end
end
@ -238,7 +277,7 @@ class AI
def merge(ai)
w = @weights.dup
w2 = ai.weights.dup
(0...18).each do |i|
(0...network_size()).each do |i|
if rand(2)==0
w[i] = w2[i]
end
@ -249,19 +288,33 @@ class AI
def average(ai)
w = @weights.dup
w2 = ai.weights
(0...18).each do |i|
(0...network_size()).each do |i|
w[i] = (w[i] + w2[i]) / 2.0
end
return AI.new(w)
end
def dump
puts "Data:"
puts "float _weights[18] = {#{@weights.join(", ")}};"
puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};"
#puts "Simplified: #{simplified}"
end
end
## Simulate
=begin
ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75,
0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d,
0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619,
0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true)
g = Game.new(ai, true)
g.apple = [3, 3]
10.times do
g.loop
end
exit
=end
graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
graph.puts("# Round - Points - Length - Stopped - Dead")
@ -272,6 +325,7 @@ games = []
ais[x] = AI.new#(SEEDS.sample)
end
best_old_game = nil
best_old_ai = nil
begin
loop do
@ -297,8 +351,14 @@ loop do
games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
g = games_sorted[0]
if (round-1)%50==0
puts "----------------------------------------------------"
puts "Round | Points | Length | Stopped | Dead | ID "
puts "----------------------------------------------------"
end
puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")]
puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id]
graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
graph.flush
@ -306,7 +366,8 @@ loop do
g.ai.dump
end
best_old_ai = g.ai
best_old_game = g
best_old_ai = g.ai.dup
ais = []
games_sorted.each do |g|
@ -331,6 +392,9 @@ loop do
round+=1
end
rescue SystemExit, Interrupt
puts
puts
puts "// Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100]
best_old_ai.dump
graph.close
end