From bd0041619a283332271ae3bb9ab8b898a40f7f46 Mon Sep 17 00:00:00 2001 From: Fabian Schlenz Date: Wed, 16 Oct 2019 05:56:42 +0200 Subject: [PATCH] Added snakenet.rb, the tool used to train the AI for snake effect. --- src/tools/snakenet/snakenet.rb | 344 +++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 src/tools/snakenet/snakenet.rb diff --git a/src/tools/snakenet/snakenet.rb b/src/tools/snakenet/snakenet.rb new file mode 100644 index 0000000..4c6bdbe --- /dev/null +++ b/src/tools/snakenet/snakenet.rb @@ -0,0 +1,344 @@ +#!/usr/bin/env ruby +require 'rubygems' +require 'pp' +require 'thread/pool' + +SEEDS = [ + #[0, 1, -1, 0, -1, 0, 1, -1, 1, -1, 1, 1, 1, 0, -1, 0, 0, 1, 0, -1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, -1, 0, 1, -1, 0, 0, 0, -1, -1, -1, 0, -1, 1, 0, -1, -1, 0, -1, 1, 0, 1, 1, 0, -1, -1, 0, -1, 1, 0, -1, 0], + #[-1, 1, -1, 0, -1, 1, -1, -1, 1, 1, -1, 1, 1, -1, 1, 0, 0, -1, 0, 1, 0, -1, -1, 1, 0, 0, 0, -1, 0, 0, 1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -1, 0, -1, 0, -1, 1, 1, 1, -1, 0, -1, 1, 0, 0, 0, -1, -1, -1, 0, -1, 1, 1, -1, 1], + #[1, 1, -1, 0, 1, 1, 0, -1, 1, 1, 1, 1, 1, -1, -1, -1, 0, -1, 0, 1, 0, -1, 0, 1, 0, 0, 0, -1, 0, 1, 1, 1, -1, -1, -1, 1, -1, -1, 0, -1, -1, -1, -1, 0, -1, -1, 1, 1, 0, 0, -1, 1, 0, 0, 0, 1, -1, -1, 0, 0, 1, 1, 0, 1], + [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,1,-1,-1,-1,-1,0,0,0,0,0,0,-1,-1,0,0,0,1,0,0,-1,-1] + +] + +GAMES_PER_ROUND = 50 + +class Game + WIDTH = 16 + HEIGHT = 10 + + POINTS_APPLE = 10 + POINTS_MOVING_CLOSER = 1 + POINTS_MOVING_FAR = -1.5 + + attr_reader :points, :dead, :ai, :length + + def initialize(a) + @ai = a + @data = [0]*(WIDTH*HEIGHT) + @dir = 0 + @pos = [WIDTH/2, HEIGHT/2] + @data[(@pos[1] )*WIDTH + @pos[0]]=1 + @data[(@pos[1]+1)*WIDTH + @pos[0]]=2 + @data[(@pos[1]+2)*WIDTH + @pos[0]]=3 + @data[(@pos[1]+3)*WIDTH + @pos[0]]=4 + @length = 4 + @points = 0.0 + @dead = false + @round = 0 + @last_apple_at = 0 + place_apple() + end + + def place_apple + x=-1 + while @data[x]!=0 || x==-1 + x = rand(WIDTH*HEIGHT) + end + @apple = [x%WIDTH, x/WIDTH] + @old_distance = apple_distance() + end + + def apple_distance + return (@pos[0] - @apple[0]).abs + (@pos[1] - @apple[1]).abs + end + + def to_s + str = @data.join("").gsub("0", " ") + str[@apple[1]*WIDTH+@apple[0]] = "*" + s = "+" + "-"*(WIDTH-@points.to_s.length-1)+" "+@points.to_s+"+\n" + (0...HEIGHT).each do |y| + s += "|" + str[y*WIDTH, WIDTH] + "|\n" + end + s += "+" + "-"*WIDTH+"+\n" + return s + end + + def draw; puts to_s; puts; end + + def loop + return if @dead + decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1)) + @dir = (@dir + decision) % 4 + if (!free?(@dir)) + die() + return + end + + move + end + + def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end + + def move + newpos = calc_new_pos(@pos, @dir) + if newpos==@apple + @length+=1 + @points += POINTS_APPLE + @last_apple_at = @round + place_apple + end + @data.each_with_index do |value, key| + @data[key]=value+1 if value>0 + @data[key]=0 if value>=@length + end + @data[newpos[1]*WIDTH + newpos[0]] = 1 + @pos = newpos + ad_d = apple_distance - @old_distance + @old_distance = apple_distance() + if (ad_d < 0) + @points += POINTS_MOVING_CLOSER + elsif (ad_d > 0) + @points += POINTS_MOVING_FAR + end + @round+=1 + end + + def since_last_apple; @round - @last_apple_at; end + + def calc_new_pos(p, d) + d = d%4 + np = p.dup + case d + when 0 then np[1]-=1 + when 1 then np[0]+=1 + when 2 then np[1]+=1 + when 3 then np[0]-=1 + end + return np + end + + def free?(dir) + newpos = calc_new_pos(@pos, dir) + return newpos[0]>=0 && newpos[0]=0 && newpos[1]@pos[0] + when 2 then return @apple[1]>@pos[1] + when 3 then return @apple[0]<@pos[0] + #when 0 then return d_y<0 && d_x.abs0 && d_x.abs>d_y.abs + #when 2 then return d_y>0 && d_x.absd_y.abs + end + end + + def die + @dead = true + end + + def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end + + def ai_ranking; ai.ranking; end +end + +class AI + attr_reader :weights + attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length + + def initialize(w=nil) + reset() + @rounds = 1 + if w==nil + @weights = Array.new(2**6){ rand(3)-1 } + else + @weights = w + end + end + + def reset + @ranking = 0.0 + @count_dead = 0 + @count_stopped = 0 + @sum_length = 0 + end + + def add_ranking(g) + @ranking += g.ranking + @count_dead += 1 if g.dead + @count_stopped += 1 if g.stopped? + @sum_length += g.length + end + + def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right) + input = apple_right ? 1 : 0 | (apple_straight ? 1<<1 : 0) | (apple_left ? 1<<2 : 0) | (right_free ? 1<<3 : 0) | (straight_free ? 1<<4 : 0) | (left_free ? 1<<5 : 0) + w = @weights[input] + return w +=begin + inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right] + outputs = [0, 0, 0] + (0...18).each do |x| + o = x/6 + i = x%6 + outputs[o] += (inputs[i] ? 1.0 : 0.0) * @weights[x] + end + max = 0 + take = 0 + (0...3).each do |x| + if outputs[x]>max + max = outputs[x] + take = x + end + end + return take-1 +=end + end + + def evolve + w = @weights.dup + loop do + if rand(2)==0 + # swap + i1 = rand(64) + i2 = rand(64) + next if w[i1]==w[i2] + temp = w[i1] + w[i1] = w[i2] + w[i2] = temp + break + else + # set new + i = rand(64) + v = rand(3)-1 + next if w[i]==v + w[i] = v + break + end + end + + # Sanity checks + (0...(2**6)).each do |i| + w[i] = rand(3)-1 if (i&(1<<4)==0) && w[i]==0 + w[i] = rand(3)-1 if (i&(1<<3)==0) && w[i]==1 + w[i] = rand(3)-1 if (i&(1<<5)==0) && w[i]==-1 + end + #i = rand(2**6) + #v=0 + #loop do + # v = rand(3)-1 + # break if v!=w[i] + #end + #w[i]=v + + #x = rand(18) + #w[x] += rand(0.4)-0.2 + #w[x] = 0.0 if w[x]<0 + #w[x] = 1.0 if w[x]>1 + return AI.new(w) + end + + def merge(ai) + w = @weights.dup + w2 = ai.weights + (0...(2**6)).each do |i| + if rand(5)==0 + w[i] = w2[i] + end + end + return AI.new(w) + end + + def simplified + #res = [] + #(0...(2**6)).each do |i| + # args = i.to_s(2).rjust(6, '0').split("").map{|c| c=='1'} + # res << decide(*args) + #end + v = 0 + count = 0 + result = [] + (0...(2**6)).each do |i| + v = v<<2 | @weights[i] + count += 1 + if count==16 + result << "0x" + v.to_s(16) + count = 0 + v = 0 + end + end + return "uint32_t weights[4] = {" + result.join(", ") + "};" + end + + def dump + puts "Data:" + puts "int8_t _decisions[64] = {#{@weights.join(", ")}};" + #puts "Simplified: #{simplified}" + end +end + +ais = [] +round = 1 +games = [] +(0...50).each do |x| + ais[x] = AI.new#(SEEDS.sample) +end + +best_old_ai = nil +begin +loop do + GAMES_PER_ROUND.times do + (0...50).each do |x| + games[x] = Game.new(ais[x]) + end + + #pool = Thread.pool(16) + games.each do |g| + + # pool.process do + 15_000.times do + g.loop + break if g.dead || g.stopped? + end + g.ai.add_ranking(g) + + end + #end + #pool.shutdown + end + + games_sorted = games.sort_by(&:ai_ranking).reverse.take(5) + g = games_sorted[0] + + if true#round%10==0 + puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100] + end + + best_old_ai = g.ai + + ais = [] + games_sorted.each do |g| + g.ai.reset + g.ai.rounds += 1 + ais << g.ai + 9.times do + ais << g.ai.evolve + end + end + + 5.times do + ais << games_sorted[0].ai.merge(games_sorted[1].ai) + end + 5.times do + ais << games_sorted[1].ai.merge(games_sorted[0].ai) + end + round+=1 +end +rescue SystemExit, Interrupt + best_old_ai.dump +end