Added snakenet.rb, the tool used to train the AI for snake effect.

2019-10-16 05:56:42 +02:00
parent a3caaa1fef
commit bd0041619a
1 changed files with 344 additions and 0 deletions
--- a/src/tools/snakenet/snakenet.rb
+++ b/src/tools/snakenet/snakenet.rb
@@ -0,0 +1,344 @@
+#!/usr/bin/env ruby
+require 'rubygems'
+require 'pp'
+require 'thread/pool'
+
+SEEDS = [
+	#[0, 1, -1, 0, -1, 0, 1, -1, 1, -1, 1, 1, 1, 0, -1, 0, 0, 1, 0, -1, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, -1, -1, 0, 1, -1, 0, 0, 0, -1, -1, -1, 0, -1, 1, 0, -1, -1, 0, -1, 1, 0, 1, 1, 0, -1, -1, 0, -1, 1, 0, -1, 0],
+	#[-1, 1, -1, 0, -1, 1, -1, -1, 1, 1, -1, 1, 1, -1, 1, 0, 0, -1, 0, 1, 0, -1, -1, 1, 0, 0, 0, -1, 0, 0, 1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -1, 0, -1, 0, -1, 1, 1, 1, -1, 0, -1, 1, 0, 0, 0, -1, -1, -1, 0, -1, 1, 1, -1, 1],
+	#[1, 1, -1, 0, 1, 1, 0, -1, 1, 1, 1, 1, 1, -1, -1, -1, 0, -1, 0, 1, 0, -1, 0, 1, 0, 0, 0, -1, 0, 1, 1, 1, -1, -1, -1, 1, -1, -1, 0, -1, -1, -1, -1, 0, -1, -1, 1, 1, 0, 0, -1, 1, 0, 0, 0, 1, -1, -1, 0, 0, 1, 1, 0, 1],
+	[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,1,-1,-1,-1,-1,0,0,0,0,0,0,-1,-1,0,0,0,1,0,0,-1,-1]
+	
+]
+
+GAMES_PER_ROUND = 50
+
+class Game
+	WIDTH = 16
+	HEIGHT = 10
+	
+	POINTS_APPLE = 10
+	POINTS_MOVING_CLOSER = 1
+	POINTS_MOVING_FAR = -1.5
+	
+	attr_reader :points, :dead, :ai, :length
+	
+	def initialize(a)
+		@ai = a
+		@data = [0]*(WIDTH*HEIGHT)
+		@dir = 0
+		@pos = [WIDTH/2, HEIGHT/2]
+		@data[(@pos[1]  )*WIDTH + @pos[0]]=1
+		@data[(@pos[1]+1)*WIDTH + @pos[0]]=2
+		@data[(@pos[1]+2)*WIDTH + @pos[0]]=3
+		@data[(@pos[1]+3)*WIDTH + @pos[0]]=4
+		@length = 4
+		@points = 0.0
+		@dead = false
+		@round = 0
+		@last_apple_at = 0
+		place_apple()
+	end
+	
+	def place_apple
+		x=-1
+		while @data[x]!=0 || x==-1
+			x = rand(WIDTH*HEIGHT)
+		end
+		@apple = [x%WIDTH, x/WIDTH]
+		@old_distance = apple_distance()
+	end
+	
+	def apple_distance
+		return (@pos[0] - @apple[0]).abs + (@pos[1] - @apple[1]).abs
+	end
+	
+	def to_s
+		str = @data.join("").gsub("0", " ")
+		str[@apple[1]*WIDTH+@apple[0]] = "*"
+		s = "+" + "-"*(WIDTH-@points.to_s.length-1)+" "+@points.to_s+"+\n"
+		(0...HEIGHT).each do |y|
+			s += "|" + str[y*WIDTH, WIDTH] + "|\n"
+		end
+		s += "+" + "-"*WIDTH+"+\n"
+		return s
+	end
+	
+	def draw; puts to_s; puts; end
+	
+	def loop
+		return if @dead
+		decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
+		@dir = (@dir + decision) % 4
+		if (!free?(@dir))
+			die()
+			return
+		end
+		
+		move
+	end
+	
+	def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end
+	
+	def move
+		newpos = calc_new_pos(@pos, @dir)
+		if newpos==@apple
+			@length+=1
+			@points += POINTS_APPLE
+			@last_apple_at = @round
+			place_apple
+		end
+		@data.each_with_index do |value, key|
+			@data[key]=value+1 if value>0
+			@data[key]=0 if value>=@length
+		end
+		@data[newpos[1]*WIDTH + newpos[0]] = 1
+		@pos = newpos
+		ad_d = apple_distance - @old_distance
+		@old_distance = apple_distance()
+		if (ad_d < 0)
+			@points += POINTS_MOVING_CLOSER
+		elsif (ad_d > 0)
+			@points += POINTS_MOVING_FAR
+		end
+		@round+=1
+	end
+	
+	def since_last_apple; @round - @last_apple_at; end
+	
+	def calc_new_pos(p, d)
+		d = d%4
+		np = p.dup
+		case d
+			when 0 then np[1]-=1
+			when 1 then np[0]+=1
+			when 2 then np[1]+=1
+			when 3 then np[0]-=1
+		end	
+		return np
+	end
+	
+	def free?(dir)
+		newpos = calc_new_pos(@pos, dir)
+		return newpos[0]>=0 && newpos[0]<WIDTH && newpos[1]>=0 && newpos[1]<HEIGHT && @data[newpos[1]*WIDTH + newpos[0]]==0
+	end
+
+	def apple?(dir)
+		dir = dir%4
+		d_x = @apple[0] - @pos[0]
+		d_y = @apple[1] - @pos[1]
+		case dir
+			when 0 then return @apple[1]<@pos[1]
+			when 1 then return @apple[0]>@pos[0]
+			when 2 then return @apple[1]>@pos[1]
+			when 3 then return @apple[0]<@pos[0]
+			#when 0 then return d_y<0 && d_x.abs<d_y.abs
+			#when 1 then return d_x>0 && d_x.abs>d_y.abs
+			#when 2 then return d_y>0 && d_x.abs<d_y.abs
+			#when 3 then return d_x<0 && d_x.abs>d_y.abs
+		end
+	end
+	
+	def die
+		@dead = true
+	end
+	
+	def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end
+	
+	def ai_ranking; ai.ranking; end
+end
+
+class AI
+	attr_reader :weights
+	attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
+	
+	def initialize(w=nil)
+		reset()
+		@rounds = 1
+		if w==nil
+			@weights = Array.new(2**6){ rand(3)-1 }
+		else
+			@weights = w
+		end
+	end
+	
+	def reset
+		@ranking = 0.0
+		@count_dead = 0
+		@count_stopped = 0
+		@sum_length = 0
+	end
+	
+	def add_ranking(g)
+		@ranking += g.ranking
+		@count_dead += 1 if g.dead
+		@count_stopped += 1 if g.stopped?
+		@sum_length += g.length
+	end
+	
+	def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
+		input = apple_right ? 1 : 0 | (apple_straight ? 1<<1 : 0) | (apple_left ? 1<<2 : 0) | (right_free ? 1<<3 : 0) | (straight_free ? 1<<4 : 0) | (left_free ? 1<<5 : 0)
+		w = @weights[input]
+		return w
+=begin
+		inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
+		outputs = [0, 0, 0]
+		(0...18).each do |x|
+			o = x/6
+			i = x%6
+			outputs[o] += (inputs[i] ? 1.0 : 0.0) * @weights[x]
+		end
+		max = 0
+		take = 0
+		(0...3).each do |x|
+			if outputs[x]>max
+				max = outputs[x]
+				take = x
+			end
+		end
+		return take-1
+=end
+	end
+	
+	def evolve
+		w = @weights.dup
+		loop do
+			if rand(2)==0
+				# swap
+				i1 = rand(64)
+				i2 = rand(64)
+				next if w[i1]==w[i2]
+				temp = w[i1]
+				w[i1] = w[i2]
+				w[i2] = temp
+				break
+			else
+				# set new
+				i = rand(64)
+				v = rand(3)-1
+				next if w[i]==v
+				w[i] = v
+				break
+			end
+		end
+		
+		# Sanity checks
+		(0...(2**6)).each do |i|
+			w[i] = rand(3)-1 if (i&(1<<4)==0) && w[i]==0
+			w[i] = rand(3)-1 if (i&(1<<3)==0) && w[i]==1
+			w[i] = rand(3)-1 if (i&(1<<5)==0) && w[i]==-1
+		end
+		#i = rand(2**6)
+		#v=0
+		#loop do
+		#	v = rand(3)-1
+		#	break if v!=w[i]
+		#end
+		#w[i]=v
+		
+		#x = rand(18)
+		#w[x] += rand(0.4)-0.2
+		#w[x] = 0.0 if w[x]<0
+		#w[x] = 1.0 if w[x]>1
+		return AI.new(w)
+	end
+	
+	def merge(ai)
+		w = @weights.dup
+		w2 = ai.weights
+		(0...(2**6)).each do |i|
+			if rand(5)==0
+				w[i] = w2[i]
+			end
+		end
+		return AI.new(w)
+	end
+	
+	def simplified
+		#res = []
+		#(0...(2**6)).each do |i|
+		#	args = i.to_s(2).rjust(6, '0').split("").map{|c| c=='1'}
+		#	res << decide(*args)
+		#end
+		v = 0
+		count = 0
+		result = []
+		(0...(2**6)).each do |i|
+			v = v<<2 | @weights[i]
+			count += 1
+			if count==16
+				result << "0x" + v.to_s(16)
+				count = 0
+				v = 0
+			end
+		end
+		return "uint32_t weights[4] = {" + result.join(", ") + "};"
+	end
+	
+	def dump
+		puts "Data:"
+		puts "int8_t _decisions[64] = {#{@weights.join(", ")}};"
+		#puts "Simplified: #{simplified}"
+	end
+end
+
+ais = []
+round = 1
+games = []
+(0...50).each do |x|
+	ais[x] = AI.new#(SEEDS.sample)
+end
+
+best_old_ai = nil
+begin
+loop do
+	GAMES_PER_ROUND.times do
+		(0...50).each do |x|
+			games[x] = Game.new(ais[x])
+		end
+		
+		#pool = Thread.pool(16)
+		games.each do |g|
+			
+		#	pool.process do
+		15_000.times do
+			g.loop
+			break if g.dead || g.stopped?
+		end 
+		g.ai.add_ranking(g)
+			
+		end
+		#end
+		#pool.shutdown
+	end
+	
+	games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
+	g = games_sorted[0]
+
+	if true#round%10==0
+		puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100]
+	end	
+	
+	best_old_ai = g.ai
+	
+	ais = []
+	games_sorted.each do |g|
+		g.ai.reset
+		g.ai.rounds += 1
+		ais << g.ai
+		9.times do
+			ais << g.ai.evolve
+		end
+	end
+	
+	5.times do
+		ais << games_sorted[0].ai.merge(games_sorted[1].ai)
+	end
+	5.times do
+		ais << games_sorted[1].ai.merge(games_sorted[0].ai)
+	end
+	round+=1
+end
+rescue SystemExit, Interrupt
+	best_old_ai.dump
+end