Snake effect now uses a neural net with a hidden layer; a bug in _is_free() that lead to lots of snakes committing suicide was fixed; neural net weights are now given as binary representation of a float to prevent rounding errors.
This commit is contained in:
@ -1,2 +1,6 @@
|
||||
#!/usr/bin/gnuplot -c
|
||||
set term dumb 79 49; plot 'data_set.dat' notitle
|
||||
set term dumb 79 49
|
||||
plot 'data_set.dat' using 1:2 title 'Points', \
|
||||
# 'data_set.dat' using 1:3 title 'Length' axes x1y2, \
|
||||
# 'data_set.dat' using 1:4 title 'Stopped' axes x1y2, \
|
||||
# 'data_set.dat' using 1:5 title 'Dead' axes x1y2
|
||||
|
@ -14,8 +14,10 @@ class Game
|
||||
POINTS_MOVING_FAR = -1.5
|
||||
|
||||
attr_reader :points, :dead, :ai, :length
|
||||
attr_accessor :apple
|
||||
|
||||
def initialize(a)
|
||||
def initialize(a, debug=false)
|
||||
@debug = debug
|
||||
@ai = a
|
||||
@data = [0]*(WIDTH*HEIGHT)
|
||||
@dir = 0
|
||||
@ -29,6 +31,7 @@ class Game
|
||||
@dead = false
|
||||
@round = 0
|
||||
@last_apple_at = 0
|
||||
@count_left = @count_right = 0
|
||||
place_apple()
|
||||
end
|
||||
|
||||
@ -63,6 +66,8 @@ class Game
|
||||
return if @dead
|
||||
decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
|
||||
#puts "Decision: #{decision}"
|
||||
@count_left += 1 if decision==-1
|
||||
@count_right += 1 if decision==1
|
||||
@dir = (@dir + decision) % 4
|
||||
if (free?(@dir)==0)
|
||||
#puts "Dead."
|
||||
@ -73,11 +78,11 @@ class Game
|
||||
move
|
||||
end
|
||||
|
||||
def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end
|
||||
def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end
|
||||
|
||||
def move
|
||||
newpos = calc_new_pos(@pos, @dir)
|
||||
#puts "Newpos: #{newpos}"
|
||||
puts "Newpos: #{newpos}" if @debug
|
||||
if newpos==@apple
|
||||
@length+=1
|
||||
@points += POINTS_APPLE
|
||||
@ -156,23 +161,41 @@ class Game
|
||||
@dead = true
|
||||
end
|
||||
|
||||
def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end
|
||||
def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end
|
||||
|
||||
def ai_ranking; ai.ranking; end
|
||||
end
|
||||
|
||||
class AI
|
||||
attr_reader :weights
|
||||
NETWORK_LAYOUT = [6, 4, 3]
|
||||
attr_reader :weights, :id
|
||||
attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
|
||||
|
||||
def initialize(w=nil)
|
||||
def initialize(w=nil, debug=false)
|
||||
@debug = debug
|
||||
reset()
|
||||
@rounds = 1
|
||||
@id = rand(0xFFFFFF)
|
||||
if w==nil
|
||||
@weights = Array.new(18) { rand() * 2.0 - 1.0 }
|
||||
@weights = Array.new(network_size()) { rand() * 2.0 - 1.0 }
|
||||
puts "Initialized with random values: #{@weights}" if @debug
|
||||
else
|
||||
@weights = w
|
||||
if w[0].is_a? Integer
|
||||
@weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten
|
||||
else
|
||||
@weights = w
|
||||
end
|
||||
puts "Initialized with given values: #{@weights}" if @debug
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def network_size
|
||||
s = 0
|
||||
(0...(NETWORK_LAYOUT.count-1)).each do |i|
|
||||
s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1]
|
||||
end
|
||||
return s
|
||||
end
|
||||
|
||||
def reset
|
||||
@ -191,43 +214,59 @@ class AI
|
||||
|
||||
def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
|
||||
inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
|
||||
#pp inputs
|
||||
outputs = [0, 0, 0]
|
||||
(0...18).each do |x|
|
||||
o = x/6
|
||||
i = x%6
|
||||
outputs[o] += inputs[i] * @weights[x]
|
||||
puts "Inputs: #{inputs}" if @debug
|
||||
outputs = nil
|
||||
x = 0
|
||||
(1...(NETWORK_LAYOUT.count)).each do |i|
|
||||
c_in = NETWORK_LAYOUT[i-1]
|
||||
c_out = NETWORK_LAYOUT[i]
|
||||
outputs = Array.new(c_out){0.0}
|
||||
(0...c_out).each do |o|
|
||||
(0...c_in).each do |i|
|
||||
outputs[o] += inputs[i] * @weights[x]
|
||||
x+=1
|
||||
end
|
||||
end
|
||||
inputs = outputs
|
||||
end
|
||||
|
||||
max = 0
|
||||
take = 0
|
||||
(0...3).each do |x|
|
||||
(0...(NETWORK_LAYOUT.last)).each do |x|
|
||||
if outputs[x]>max
|
||||
max = outputs[x]
|
||||
take = x
|
||||
end
|
||||
end
|
||||
puts "Decision: #{take-1}" if @debug
|
||||
return take-1
|
||||
end
|
||||
|
||||
def evolve
|
||||
w = @weights.dup
|
||||
action = rand(4)
|
||||
action = rand(5)
|
||||
if action==0 #swap
|
||||
i1 = rand(18)
|
||||
i2 = rand(18)
|
||||
i1 = rand(network_size())
|
||||
i2 = rand(network_size())
|
||||
temp = w[i1]
|
||||
w[i1] = w[i2]
|
||||
w[i2] = temp
|
||||
elsif action==1 #change single value
|
||||
i = rand(18)
|
||||
i = rand(network_size())
|
||||
w[i] = rand() * 2 - 1.0
|
||||
elsif action==2 #invert single value
|
||||
i = rand(18)
|
||||
i = rand(network_size())
|
||||
w[i] *= -1.0
|
||||
elsif action==3
|
||||
(0...network_size()).each do |i|
|
||||
w[i] = rand() * 2 - 1.0 if rand(5)==0
|
||||
end
|
||||
else #change multiple values
|
||||
(0...18).each do |i|
|
||||
(0...network_size()).each do |i|
|
||||
if (rand(5)==0)
|
||||
w[i] = rand() * 2 - 1
|
||||
w[i] += rand() / 5.0 - 0.1
|
||||
w[i] = 1.0 if w[i]>1.0
|
||||
w[i] = -1.0 if w[i]<-1.0
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -238,7 +277,7 @@ class AI
|
||||
def merge(ai)
|
||||
w = @weights.dup
|
||||
w2 = ai.weights.dup
|
||||
(0...18).each do |i|
|
||||
(0...network_size()).each do |i|
|
||||
if rand(2)==0
|
||||
w[i] = w2[i]
|
||||
end
|
||||
@ -249,19 +288,33 @@ class AI
|
||||
def average(ai)
|
||||
w = @weights.dup
|
||||
w2 = ai.weights
|
||||
(0...18).each do |i|
|
||||
(0...network_size()).each do |i|
|
||||
w[i] = (w[i] + w2[i]) / 2.0
|
||||
end
|
||||
return AI.new(w)
|
||||
end
|
||||
|
||||
def dump
|
||||
puts "Data:"
|
||||
puts "float _weights[18] = {#{@weights.join(", ")}};"
|
||||
puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};"
|
||||
#puts "Simplified: #{simplified}"
|
||||
end
|
||||
end
|
||||
|
||||
## Simulate
|
||||
=begin
|
||||
ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75,
|
||||
0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d,
|
||||
0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619,
|
||||
0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true)
|
||||
g = Game.new(ai, true)
|
||||
g.apple = [3, 3]
|
||||
10.times do
|
||||
g.loop
|
||||
end
|
||||
exit
|
||||
=end
|
||||
|
||||
|
||||
graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
|
||||
graph.puts("# Round - Points - Length - Stopped - Dead")
|
||||
|
||||
@ -272,6 +325,7 @@ games = []
|
||||
ais[x] = AI.new#(SEEDS.sample)
|
||||
end
|
||||
|
||||
best_old_game = nil
|
||||
best_old_ai = nil
|
||||
begin
|
||||
loop do
|
||||
@ -297,8 +351,14 @@ loop do
|
||||
|
||||
games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
|
||||
g = games_sorted[0]
|
||||
|
||||
if (round-1)%50==0
|
||||
puts "----------------------------------------------------"
|
||||
puts "Round | Points | Length | Stopped | Dead | ID "
|
||||
puts "----------------------------------------------------"
|
||||
end
|
||||
|
||||
puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")]
|
||||
puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id]
|
||||
graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
|
||||
graph.flush
|
||||
|
||||
@ -306,7 +366,8 @@ loop do
|
||||
g.ai.dump
|
||||
end
|
||||
|
||||
best_old_ai = g.ai
|
||||
best_old_game = g
|
||||
best_old_ai = g.ai.dup
|
||||
|
||||
ais = []
|
||||
games_sorted.each do |g|
|
||||
@ -331,6 +392,9 @@ loop do
|
||||
round+=1
|
||||
end
|
||||
rescue SystemExit, Interrupt
|
||||
puts
|
||||
puts
|
||||
puts "// Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100]
|
||||
best_old_ai.dump
|
||||
graph.close
|
||||
end
|
||||
|
Reference in New Issue
Block a user