#!/usr/bin/env ruby require 'rubygems' require 'pp' require 'thread/pool' GAMES_PER_ROUND = 50 FLOAT = true class Game WIDTH = 16 HEIGHT = 10 POINTS_APPLE = 10 POINTS_MOVING_CLOSER = 1 POINTS_MOVING_FAR = -1.5 attr_reader :points, :dead, :ai, :length attr_accessor :apple def initialize(a, debug=false) @debug = debug @ai = a @data = [0]*(WIDTH*HEIGHT) @dir = 0 @pos = [WIDTH/2, HEIGHT/2] @data[(@pos[1] )*WIDTH + @pos[0]]=1 @data[(@pos[1]+1)*WIDTH + @pos[0]]=2 @data[(@pos[1]+2)*WIDTH + @pos[0]]=3 @data[(@pos[1]+3)*WIDTH + @pos[0]]=4 @length = 4 @points = 0.0 @dead = false @round = 0 @last_apple_at = 0 @count_left = @count_right = 0 place_apple() end def place_apple x=-1 while @data[x]!=0 || x==-1 x = rand(WIDTH*HEIGHT) end @apple = [x%WIDTH, x/WIDTH] @old_distance = apple_distance() end def apple_distance return (@pos[0] - @apple[0]).abs + (@pos[1] - @apple[1]).abs end def to_s str = @data.join("").gsub("0", " ") str[@apple[1]*WIDTH+@apple[0]] = "*" s = "+" + "-"*(WIDTH-@points.to_s.length-1)+" "+@points.to_s+"+\n" (0...HEIGHT).each do |y| s += "|" + str[y*WIDTH, WIDTH] + "|\n" end s += "+" + "-"*WIDTH+"+\n" return s end def draw; puts to_s; puts; end def loop #puts "Loop. Position: #{@pos}" return if @dead decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1)) #puts "Decision: #{decision}" @count_left += 1 if decision==-1 @count_right += 1 if decision==1 @dir = (@dir + decision) % 4 if (free?(@dir)==0) #puts "Dead." die() return end move end def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end def move newpos = calc_new_pos(@pos, @dir) puts "Newpos: #{newpos}" if @debug if newpos==@apple @length+=1 @points += POINTS_APPLE @last_apple_at = @round place_apple end @data.each_with_index do |value, key| @data[key]=value+1 if value>0 @data[key]=0 if value>=@length end @data[newpos[1]*WIDTH + newpos[0]] = 1 @pos = newpos ad_d = apple_distance - @old_distance @old_distance = apple_distance() if (ad_d < 0) @points += POINTS_MOVING_CLOSER elsif (ad_d > 0) @points += POINTS_MOVING_FAR end @round+=1 end def since_last_apple; @round - @last_apple_at; end def calc_new_pos(p, d) d = d%4 np = p.dup case d when 0 then np[1]-=1 when 1 then np[0]+=1 when 2 then np[1]+=1 when 3 then np[0]-=1 end return np end def free?(dir) # count the free fields from @pos in dir until a wall or something dir = dir % 4 x=y=0 case dir when 0 then y=-1 when 1 then x=+1 when 2 then y=+1 when 3 then x=-1 end i = 0 pos = @pos.dup [WIDTH, HEIGHT].max.times do pos[0]+=x pos[1]+=y break if pos[0]<0 || pos[0]>=WIDTH || pos[1]<0 || pos[1]>=HEIGHT || @data[pos[1]*WIDTH + pos[0]]!=0 i+=1 end return i end def apple?(dir) dir = dir%4 d_x = @apple[0] - @pos[0] d_y = @apple[1] - @pos[1] case dir when 0 then return @apple[1]<@pos[1] ? -d_y : 0 when 1 then return @apple[0]>@pos[0] ? d_x : 0 when 2 then return @apple[1]>@pos[1] ? d_y : 0 when 3 then return @apple[0]<@pos[0] ? -d_x : 0 #when 0 then return d_y<0 && d_x.abs0 && d_x.abs>d_y.abs #when 2 then return d_y>0 && d_x.absd_y.abs end end def die @dead = true end def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end def ai_ranking; ai.ranking; end end class AI NETWORK_LAYOUT = [6, 4, 3] attr_reader :weights, :id attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length def initialize(w=nil, debug=false) @debug = debug reset() @rounds = 1 @id = rand(0xFFFFFF) if w==nil @weights = Array.new(network_size()) { FLOAT ? rand() * 2.0 - 1.0 : rand(256) - 128 } puts "Initialized with random values: #{@weights}" if @debug else if w[0].is_a?(Integer) && FLOAT @weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten else @weights = w end puts "Initialized with given values: #{@weights}" if @debug end end def network_size s = 0 (0...(NETWORK_LAYOUT.count-1)).each do |i| s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1] end return s end def reset @ranking = 0.0 @count_dead = 0 @count_stopped = 0 @sum_length = 0 end def add_ranking(g) @ranking += g.ranking @count_dead += 1 if g.dead @count_stopped += 1 if g.stopped? @sum_length += g.length end def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right) inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right] puts "Inputs: #{inputs}" if @debug outputs = nil x = 0 (1...(NETWORK_LAYOUT.count)).each do |i| c_in = NETWORK_LAYOUT[i-1] c_out = NETWORK_LAYOUT[i] outputs = Array.new(c_out){FLOAT ? 0.0 : 0} (0...c_out).each do |o| (0...c_in).each do |i| outputs[o] += inputs[i] * @weights[x] x+=1 end end inputs = outputs end max = 0 take = 0 (0...(NETWORK_LAYOUT.last)).each do |x| if outputs[x]>max max = outputs[x] take = x end end puts "Decision: #{take-1}" if @debug return take-1 end def evolve w = @weights.dup action = rand(4) #if action==0 #swap # i1 = rand(network_size()) # i2 = rand(network_size()) # temp = w[i1] # w[i1] = w[i2] # w[i2] = temp if action==0 #change single value i = rand(network_size()) diff = FLOAT ? rand() * 0.2 - 0.1 : rand(256) - 128 w2 = w.dup w[i] += diff if FLOAT w[i] = 1.0 if w[i]>1.0 w[i] = -1.0 if w[i]<-1.0 else w[i] = 127 if w[i]>127 w[i] = -128 if w[i]<-128 end w2[i] -= diff if FLOAT w2[i] = 1.0 if w2[i]>1.0 w2[i] = -1.0 if w2[i]<-1.0 else w2[i] = 127 if w2[i]>127 w2[i] = -128 if w2[i]<-128 end return [AI.new(w), AI.new(w2)] elsif action==1 #invert single value i = rand(network_size()) w[i] *= FLOAT ? -1.0 : -1 elsif action==2 (0...network_size()).each do |i| w[i] = (FLOAT ? rand() * 2 - 1.0 : rand(256) - 128) if rand(5)==0 end else #change multiple values w2 = w.dup (0...network_size()).each do |i| if (rand(5)==0) diff = FLOAT ? rand() * 0.2 - 0.1 : rand(256) - 128 w[i] += diff if FLOAT w[i] = 1.0 if w[i]>1.0 w[i] = -1.0 if w[i]<-1.0 else w[i] = 127 if w[i]>127 w[i] = -128 if w[i]<-128 end w2[i] -= diff if FLOAT w2[i] = 1.0 if w2[i]>1.0 w2[i] = -1.0 if w2[i]<-1.0 else w2[i] = 127 if w2[i]>127 w2[i] = -128 if w2[i]<-128 end end end return [AI.new(w), AI.new(w2)] end return AI.new(w) end def merge(ai) w = @weights.dup w2 = ai.weights.dup (0...network_size()).each do |i| if rand(2)==0 w[i] = w2[i] end end return AI.new(w) end def average(ai) w = @weights.dup w2 = ai.weights (0...network_size()).each do |i| w[i] = (w[i] + w2[i]) / (FLOAT ? 2.0 : 2) end return AI.new(w) end def dump if FLOAT puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};" else puts "const int8_t _weights[#{network_size()}] = {#{@weights.join(", ")}};" end #puts "Simplified: #{simplified}" end end ## Simulate =begin ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75, 0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d, 0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true) g = Game.new(ai, true) g.apple = [3, 3] 10.times do g.loop end exit =end graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w") graph.puts("# Round - Points - Length - Stopped - Dead") ais = [] round = 1 games = [] (0...50).each do |x| ais[x] = AI.new#(SEEDS.sample) end best_old_game = nil best_old_ai = nil begin loop do GAMES_PER_ROUND.times do (0...50).each do |x| games[x] = Game.new(ais[x]) end pool = Thread.pool(16) games.each do |g| pool.process do 15_000.times do g.loop break if g.dead || g.stopped? end g.ai.add_ranking(g) end end pool.shutdown end games_sorted = games.sort_by(&:ai_ranking).reverse.take(5) g = games_sorted[0] if (round-1)%50==0 puts "----------------------------------------------------" puts "Round | Points | Length | Stopped | Dead | ID " puts "----------------------------------------------------" end puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id] graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100]) graph.flush if round%10==0 g.ai.dump end best_old_game = g best_old_ai = g.ai.dup ais = [] games_sorted.each do |g| g.ai.reset g.ai.rounds += 1 ais << g.ai 9.times do ais << g.ai.evolve end end 5.times do ais << games_sorted[0].ai.merge(games_sorted[1].ai) end 5.times do ais << games_sorted[1].ai.merge(games_sorted[0].ai) end ais << games_sorted[0].ai.average(games_sorted[1].ai) 10.times do ais << AI.new end ais = ais.flatten round+=1 end rescue SystemExit, Interrupt puts puts puts "// Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100] best_old_ai.dump graph.close end