416 lines
9.4 KiB
Ruby
416 lines
9.4 KiB
Ruby
#!/usr/bin/env ruby
|
|
require 'rubygems'
|
|
require 'pp'
|
|
require 'thread/pool'
|
|
|
|
GAMES_PER_ROUND = 50
|
|
|
|
class Game
|
|
WIDTH = 16
|
|
HEIGHT = 10
|
|
|
|
POINTS_APPLE = 10
|
|
POINTS_MOVING_CLOSER = 1
|
|
POINTS_MOVING_FAR = -1.5
|
|
|
|
attr_reader :points, :dead, :ai, :length
|
|
attr_accessor :apple
|
|
|
|
def initialize(a, debug=false)
|
|
@debug = debug
|
|
@ai = a
|
|
@data = [0]*(WIDTH*HEIGHT)
|
|
@dir = 0
|
|
@pos = [WIDTH/2, HEIGHT/2]
|
|
@data[(@pos[1] )*WIDTH + @pos[0]]=1
|
|
@data[(@pos[1]+1)*WIDTH + @pos[0]]=2
|
|
@data[(@pos[1]+2)*WIDTH + @pos[0]]=3
|
|
@data[(@pos[1]+3)*WIDTH + @pos[0]]=4
|
|
@length = 4
|
|
@points = 0.0
|
|
@dead = false
|
|
@round = 0
|
|
@last_apple_at = 0
|
|
@count_left = @count_right = 0
|
|
place_apple()
|
|
end
|
|
|
|
def place_apple
|
|
x=-1
|
|
while @data[x]!=0 || x==-1
|
|
x = rand(WIDTH*HEIGHT)
|
|
end
|
|
@apple = [x%WIDTH, x/WIDTH]
|
|
@old_distance = apple_distance()
|
|
end
|
|
|
|
def apple_distance
|
|
return (@pos[0] - @apple[0]).abs + (@pos[1] - @apple[1]).abs
|
|
end
|
|
|
|
def to_s
|
|
str = @data.join("").gsub("0", " ")
|
|
str[@apple[1]*WIDTH+@apple[0]] = "*"
|
|
s = "+" + "-"*(WIDTH-@points.to_s.length-1)+" "+@points.to_s+"+\n"
|
|
(0...HEIGHT).each do |y|
|
|
s += "|" + str[y*WIDTH, WIDTH] + "|\n"
|
|
end
|
|
s += "+" + "-"*WIDTH+"+\n"
|
|
return s
|
|
end
|
|
|
|
def draw; puts to_s; puts; end
|
|
|
|
def loop
|
|
#puts "Loop. Position: #{@pos}"
|
|
return if @dead
|
|
decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
|
|
#puts "Decision: #{decision}"
|
|
@count_left += 1 if decision==-1
|
|
@count_right += 1 if decision==1
|
|
@dir = (@dir + decision) % 4
|
|
if (free?(@dir)==0)
|
|
#puts "Dead."
|
|
die()
|
|
return
|
|
end
|
|
|
|
move
|
|
end
|
|
|
|
def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end
|
|
|
|
def move
|
|
newpos = calc_new_pos(@pos, @dir)
|
|
puts "Newpos: #{newpos}" if @debug
|
|
if newpos==@apple
|
|
@length+=1
|
|
@points += POINTS_APPLE
|
|
@last_apple_at = @round
|
|
place_apple
|
|
end
|
|
@data.each_with_index do |value, key|
|
|
@data[key]=value+1 if value>0
|
|
@data[key]=0 if value>=@length
|
|
end
|
|
@data[newpos[1]*WIDTH + newpos[0]] = 1
|
|
@pos = newpos
|
|
ad_d = apple_distance - @old_distance
|
|
@old_distance = apple_distance()
|
|
if (ad_d < 0)
|
|
@points += POINTS_MOVING_CLOSER
|
|
elsif (ad_d > 0)
|
|
@points += POINTS_MOVING_FAR
|
|
end
|
|
@round+=1
|
|
end
|
|
|
|
def since_last_apple; @round - @last_apple_at; end
|
|
|
|
def calc_new_pos(p, d)
|
|
d = d%4
|
|
np = p.dup
|
|
case d
|
|
when 0 then np[1]-=1
|
|
when 1 then np[0]+=1
|
|
when 2 then np[1]+=1
|
|
when 3 then np[0]-=1
|
|
end
|
|
return np
|
|
end
|
|
|
|
def free?(dir)
|
|
# count the free fields from @pos in dir until a wall or something
|
|
dir = dir % 4
|
|
x=y=0
|
|
case dir
|
|
when 0 then y=-1
|
|
when 1 then x=+1
|
|
when 2 then y=+1
|
|
when 3 then x=-1
|
|
end
|
|
i = 0
|
|
pos = @pos.dup
|
|
|
|
[WIDTH, HEIGHT].max.times do
|
|
pos[0]+=x
|
|
pos[1]+=y
|
|
break if pos[0]<0 || pos[0]>=WIDTH || pos[1]<0 || pos[1]>=HEIGHT || @data[pos[1]*WIDTH + pos[0]]!=0
|
|
i+=1
|
|
end
|
|
return i
|
|
end
|
|
|
|
def apple?(dir)
|
|
dir = dir%4
|
|
d_x = @apple[0] - @pos[0]
|
|
d_y = @apple[1] - @pos[1]
|
|
case dir
|
|
when 0 then return @apple[1]<@pos[1] ? -d_y : 0
|
|
when 1 then return @apple[0]>@pos[0] ? d_x : 0
|
|
when 2 then return @apple[1]>@pos[1] ? d_y : 0
|
|
when 3 then return @apple[0]<@pos[0] ? -d_x : 0
|
|
#when 0 then return d_y<0 && d_x.abs<d_y.abs
|
|
#when 1 then return d_x>0 && d_x.abs>d_y.abs
|
|
#when 2 then return d_y>0 && d_x.abs<d_y.abs
|
|
#when 3 then return d_x<0 && d_x.abs>d_y.abs
|
|
end
|
|
end
|
|
|
|
def die
|
|
@dead = true
|
|
end
|
|
|
|
def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end
|
|
|
|
def ai_ranking; ai.ranking; end
|
|
end
|
|
|
|
class AI
|
|
NETWORK_LAYOUT = [6, 4, 3]
|
|
attr_reader :weights, :id
|
|
attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
|
|
|
|
def initialize(w=nil, debug=false)
|
|
@debug = debug
|
|
reset()
|
|
@rounds = 1
|
|
@id = rand(0xFFFFFF)
|
|
if w==nil
|
|
@weights = Array.new(network_size()) { rand() * 2.0 - 1.0 }
|
|
puts "Initialized with random values: #{@weights}" if @debug
|
|
else
|
|
if w[0].is_a? Integer
|
|
@weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten
|
|
else
|
|
@weights = w
|
|
end
|
|
puts "Initialized with given values: #{@weights}" if @debug
|
|
end
|
|
|
|
end
|
|
|
|
def network_size
|
|
s = 0
|
|
(0...(NETWORK_LAYOUT.count-1)).each do |i|
|
|
s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1]
|
|
end
|
|
return s
|
|
end
|
|
|
|
def reset
|
|
@ranking = 0.0
|
|
@count_dead = 0
|
|
@count_stopped = 0
|
|
@sum_length = 0
|
|
end
|
|
|
|
def add_ranking(g)
|
|
@ranking += g.ranking
|
|
@count_dead += 1 if g.dead
|
|
@count_stopped += 1 if g.stopped?
|
|
@sum_length += g.length
|
|
end
|
|
|
|
def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
|
|
inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
|
|
puts "Inputs: #{inputs}" if @debug
|
|
outputs = nil
|
|
x = 0
|
|
(1...(NETWORK_LAYOUT.count)).each do |i|
|
|
c_in = NETWORK_LAYOUT[i-1]
|
|
c_out = NETWORK_LAYOUT[i]
|
|
outputs = Array.new(c_out){0.0}
|
|
(0...c_out).each do |o|
|
|
(0...c_in).each do |i|
|
|
outputs[o] += inputs[i] * @weights[x]
|
|
x+=1
|
|
end
|
|
end
|
|
inputs = outputs
|
|
end
|
|
|
|
max = 0
|
|
take = 0
|
|
(0...(NETWORK_LAYOUT.last)).each do |x|
|
|
if outputs[x]>max
|
|
max = outputs[x]
|
|
take = x
|
|
end
|
|
end
|
|
puts "Decision: #{take-1}" if @debug
|
|
return take-1
|
|
end
|
|
|
|
def evolve
|
|
w = @weights.dup
|
|
action = rand(4)
|
|
#if action==0 #swap
|
|
# i1 = rand(network_size())
|
|
# i2 = rand(network_size())
|
|
# temp = w[i1]
|
|
# w[i1] = w[i2]
|
|
# w[i2] = temp
|
|
if action==0 #change single value
|
|
i = rand(network_size())
|
|
diff = rand() * 0.2 - 0.1
|
|
w2 = w.dup
|
|
w[i] += diff
|
|
w[i] = 1.0 if w[i]>1.0
|
|
w[i] = -1.0 if w[i]<-1.0
|
|
w2[i] -= diff
|
|
w2[i] = 1.0 if w2[i]>1.0
|
|
w2[i] = -1.0 if w2[i]<-1.0
|
|
return [AI.new(w), AI.new(w2)]
|
|
elsif action==1 #invert single value
|
|
i = rand(network_size())
|
|
w[i] *= -1.0
|
|
elsif action==2
|
|
(0...network_size()).each do |i|
|
|
w[i] = rand() * 2 - 1.0 if rand(5)==0
|
|
end
|
|
else #change multiple values
|
|
w2 = w.dup
|
|
(0...network_size()).each do |i|
|
|
if (rand(5)==0)
|
|
diff = rand() * 0.2 - 0.1
|
|
w[i] += diff
|
|
w[i] = 1.0 if w[i]>1.0
|
|
w[i] = -1.0 if w[i]<-1.0
|
|
w2[i] -= diff
|
|
w2[i] = 1.0 if w2[i]>1.0
|
|
w2[i] = -1.0 if w2[i]<-1.0
|
|
end
|
|
end
|
|
return [AI.new(w), AI.new(w2)]
|
|
end
|
|
|
|
return AI.new(w)
|
|
end
|
|
|
|
def merge(ai)
|
|
w = @weights.dup
|
|
w2 = ai.weights.dup
|
|
(0...network_size()).each do |i|
|
|
if rand(2)==0
|
|
w[i] = w2[i]
|
|
end
|
|
end
|
|
return AI.new(w)
|
|
end
|
|
|
|
def average(ai)
|
|
w = @weights.dup
|
|
w2 = ai.weights
|
|
(0...network_size()).each do |i|
|
|
w[i] = (w[i] + w2[i]) / 2.0
|
|
end
|
|
return AI.new(w)
|
|
end
|
|
|
|
def dump
|
|
puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};"
|
|
#puts "Simplified: #{simplified}"
|
|
end
|
|
end
|
|
|
|
## Simulate
|
|
=begin
|
|
ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75,
|
|
0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d,
|
|
0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619,
|
|
0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true)
|
|
g = Game.new(ai, true)
|
|
g.apple = [3, 3]
|
|
10.times do
|
|
g.loop
|
|
end
|
|
exit
|
|
=end
|
|
|
|
|
|
graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
|
|
graph.puts("# Round - Points - Length - Stopped - Dead")
|
|
|
|
ais = []
|
|
round = 1
|
|
games = []
|
|
(0...50).each do |x|
|
|
ais[x] = AI.new#(SEEDS.sample)
|
|
end
|
|
|
|
best_old_game = nil
|
|
best_old_ai = nil
|
|
begin
|
|
loop do
|
|
GAMES_PER_ROUND.times do
|
|
(0...50).each do |x|
|
|
games[x] = Game.new(ais[x])
|
|
end
|
|
|
|
pool = Thread.pool(16)
|
|
games.each do |g|
|
|
|
|
pool.process do
|
|
15_000.times do
|
|
g.loop
|
|
break if g.dead || g.stopped?
|
|
end
|
|
g.ai.add_ranking(g)
|
|
|
|
end
|
|
end
|
|
pool.shutdown
|
|
end
|
|
|
|
games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
|
|
g = games_sorted[0]
|
|
|
|
if (round-1)%50==0
|
|
puts "----------------------------------------------------"
|
|
puts "Round | Points | Length | Stopped | Dead | ID "
|
|
puts "----------------------------------------------------"
|
|
end
|
|
|
|
puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id]
|
|
graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
|
|
graph.flush
|
|
|
|
if round%10==0
|
|
g.ai.dump
|
|
end
|
|
|
|
best_old_game = g
|
|
best_old_ai = g.ai.dup
|
|
|
|
ais = []
|
|
games_sorted.each do |g|
|
|
g.ai.reset
|
|
g.ai.rounds += 1
|
|
ais << g.ai
|
|
9.times do
|
|
ais << g.ai.evolve
|
|
end
|
|
end
|
|
|
|
5.times do
|
|
ais << games_sorted[0].ai.merge(games_sorted[1].ai)
|
|
end
|
|
5.times do
|
|
ais << games_sorted[1].ai.merge(games_sorted[0].ai)
|
|
end
|
|
ais << games_sorted[0].ai.average(games_sorted[1].ai)
|
|
10.times do
|
|
ais << AI.new
|
|
end
|
|
ais = ais.flatten
|
|
round+=1
|
|
end
|
|
rescue SystemExit, Interrupt
|
|
puts
|
|
puts
|
|
puts "// Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100]
|
|
best_old_ai.dump
|
|
graph.close
|
|
end
|