Snake effect now uses a neural net with a hidden layer; a bug in _is_free() that lead to lots of snakes committing suicide was fixed; neural net weights are now given as binary representation of a float to prevent rounding errors.

2019-10-18 06:40:09 +02:00
parent 306f72d838
commit 54925dfc0e
4 changed files with 178 additions and 62 deletions
--- a/src/tools/snakenet/plot
+++ b/src/tools/snakenet/plot
@@ -1,2 +1,6 @@
 #!/usr/bin/gnuplot -c
-set term dumb 79 49; plot 'data_set.dat' notitle
+set term dumb 79 49
+plot 'data_set.dat' using 1:2 title 'Points', \
+# 'data_set.dat' using 1:3 title 'Length' axes x1y2, \
+# 'data_set.dat' using 1:4 title 'Stopped' axes x1y2, \
+# 'data_set.dat' using 1:5 title 'Dead' axes x1y2
--- a/src/tools/snakenet/snakenet.rb
+++ b/src/tools/snakenet/snakenet.rb
@@ -14,8 +14,10 @@ class Game
 	POINTS_MOVING_FAR = -1.5
 	
 	attr_reader :points, :dead, :ai, :length
+	attr_accessor :apple
 	
-	def initialize(a)
+	def initialize(a, debug=false)
+		@debug = debug
 		@ai = a
 		@data = [0]*(WIDTH*HEIGHT)
 		@dir = 0
@@ -29,6 +31,7 @@ class Game
 		@dead = false
 		@round = 0
 		@last_apple_at = 0
+		@count_left = @count_right = 0
 		place_apple()
 	end
 	
@@ -63,6 +66,8 @@ class Game
 		return if @dead
 		decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
 		#puts "Decision: #{decision}"
+		@count_left += 1 if decision==-1
+		@count_right += 1 if decision==1
 		@dir = (@dir + decision) % 4
 		if (free?(@dir)==0)
 			#puts "Dead."
@@ -73,11 +78,11 @@ class Game
 		move
 	end
 	
-	def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end
+	def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end
 	
 	def move
 		newpos = calc_new_pos(@pos, @dir)
-		#puts "Newpos: #{newpos}"
+		puts "Newpos: #{newpos}" if @debug
 		if newpos==@apple
 			@length+=1
 			@points += POINTS_APPLE
@@ -156,23 +161,41 @@ class Game
 		@dead = true
 	end
 	
-	def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end
+	def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end
 	
 	def ai_ranking; ai.ranking; end
 end

 class AI
-	attr_reader :weights
+	NETWORK_LAYOUT = [6, 4, 3]
+	attr_reader :weights, :id
 	attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
 	
-	def initialize(w=nil)
+	def initialize(w=nil, debug=false)
+		@debug = debug
 		reset()
 		@rounds = 1
+		@id = rand(0xFFFFFF)
 		if w==nil
-			@weights = Array.new(18) { rand() * 2.0 - 1.0 }
+			@weights = Array.new(network_size()) { rand() * 2.0 - 1.0 }
+			puts "Initialized with random values: #{@weights}" if @debug
 		else
-			@weights = w
+			if w[0].is_a? Integer
+				@weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten
+			else
+				@weights = w
+			end
+			puts "Initialized with given values: #{@weights}" if @debug
 		end
+		
+	end
+	
+	def network_size
+		s = 0
+		(0...(NETWORK_LAYOUT.count-1)).each do |i|
+			s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1]
+		end
+		return s
 	end
 	
 	def reset
@@ -191,43 +214,59 @@ class AI
 	
 	def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
 		inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
-		#pp inputs
-		outputs = [0, 0, 0]
-		(0...18).each do |x|
-			o = x/6
-			i = x%6
-			outputs[o] += inputs[i] * @weights[x]
+		puts "Inputs: #{inputs}" if @debug
+		outputs = nil
+		x = 0
+		(1...(NETWORK_LAYOUT.count)).each do |i|
+			c_in = NETWORK_LAYOUT[i-1]
+			c_out = NETWORK_LAYOUT[i]
+			outputs = Array.new(c_out){0.0}
+			(0...c_out).each do |o|
+				(0...c_in).each do |i|
+					outputs[o] += inputs[i] * @weights[x]
+					x+=1
+				end
+			end
+			inputs = outputs
 		end
+
 		max = 0
 		take = 0
-		(0...3).each do |x|
+		(0...(NETWORK_LAYOUT.last)).each do |x|
 			if outputs[x]>max
 				max = outputs[x]
 				take = x
 			end
 		end
+		puts "Decision: #{take-1}" if @debug
 		return take-1
 	end
 	
 	def evolve
 		w = @weights.dup
-		action = rand(4)
+		action = rand(5)
 		if action==0 #swap
-			i1 = rand(18)
-			i2 = rand(18)
+			i1 = rand(network_size())
+			i2 = rand(network_size())
 			temp = w[i1]
 			w[i1] = w[i2]
 			w[i2] = temp
 		elsif action==1 #change single value
-			i = rand(18)
+			i = rand(network_size())
 			w[i] = rand() * 2 - 1.0
 		elsif action==2 #invert single value
-			i = rand(18)
+			i = rand(network_size())
 			w[i] *= -1.0
+		elsif action==3
+			(0...network_size()).each do |i|
+				w[i] = rand() * 2 - 1.0 if rand(5)==0
+			end
 		else #change multiple values
-			(0...18).each do |i|
+			(0...network_size()).each do |i|
 				if (rand(5)==0)
-					w[i] = rand() * 2 - 1
+					w[i] += rand() / 5.0 - 0.1
+					w[i] = 1.0 if w[i]>1.0
+					w[i] = -1.0 if w[i]<-1.0
 				end
 			end
 		end
@@ -238,7 +277,7 @@ class AI
 	def merge(ai)
 		w = @weights.dup
 		w2 = ai.weights.dup
-		(0...18).each do |i|
+		(0...network_size()).each do |i|
 			if rand(2)==0
 				w[i] = w2[i]
 			end
@@ -249,19 +288,33 @@ class AI
 	def average(ai)
 		w = @weights.dup
 		w2 = ai.weights
-		(0...18).each do |i|
+		(0...network_size()).each do |i|
 			w[i] = (w[i] + w2[i]) / 2.0
 		end
 		return AI.new(w)
 	end
 	
 	def dump
-		puts "Data:"
-		puts "float _weights[18] = {#{@weights.join(", ")}};"
+		puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};"
 		#puts "Simplified: #{simplified}"
 	end
 end

+## Simulate
+=begin
+ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75,
+0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d,
+0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619,
+0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true)
+g = Game.new(ai, true)
+g.apple = [3, 3]
+10.times do
+	g.loop
+end
+exit
+=end
+
+
 graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
 graph.puts("# Round -  Points -  Length -  Stopped -  Dead")

@@ -272,6 +325,7 @@ games = []
 	ais[x] = AI.new#(SEEDS.sample)
 end

+best_old_game = nil
 best_old_ai = nil
 begin
 loop do
@@ -297,8 +351,14 @@ loop do
 	
 	games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
 	g = games_sorted[0]
+	
+	if (round-1)%50==0
+		puts "----------------------------------------------------"
+		puts "Round |  Points | Length | Stopped | Dead | ID      "
+		puts "----------------------------------------------------"
+	end

-	puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")]
+	puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id]
 	graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
 	graph.flush
 	
@@ -306,7 +366,8 @@ loop do
 		g.ai.dump
 	end
 	
-	best_old_ai = g.ai
+	best_old_game = g
+	best_old_ai = g.ai.dup
 	
 	ais = []
 	games_sorted.each do |g|
@@ -331,6 +392,9 @@ loop do
 	round+=1
 end
 rescue SystemExit, Interrupt
+	puts
+	puts
+	puts "//   Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100]
 	best_old_ai.dump
 	graph.close
 end