Snake effect now uses a neural net with a hidden layer; a bug in _is_free() that lead to lots of snakes committing suicide was fixed; neural net weights are now given as binary representation of a float to prevent rounding errors.

2019-10-18 06:40:09 +02:00
parent 306f72d838
commit 54925dfc0e
4 changed files with 178 additions and 62 deletions
--- a/include/effect_snake.h
+++ b/include/effect_snake.h
@@ -8,6 +8,8 @@
 #define SNAKE_DIR_SOUTH 2
 #define SNAKE_DIR_WEST  3

+#define SNAKE_DEBUG false
+
 class SnakeEffect : public Effect {
 private:
 	Coords _pos;
@@ -18,10 +20,22 @@ private:
 	uint8_t _length;
 	unsigned long _last_apple_at;
 	unsigned long _last_move_at;
-	// 204.6 points, length  35,  58% stopped,  42% dead
-	// float _weights[18] = {0.8613356309729567, 0.1010670216231977, -0.03801953620401166, 0.7556556498067926, -0.9925124063206012, 0.6375227768823608, 0.7216340201735381, 0.5557451907997892, 0.03496949604344035, 0.7238725631217913, 0.9070987343528141, 0.6518810721526125, -0.17322587217593544, -0.7726195238221361, 0.8044226332955624, 0.8434782354002677, 0.8508760698750302, 0.47735675603010397};
-	// Round  2077:   208.6 points, length  36,  50% stopped,  50% dead
-	float _weights[18] = {-0.1648448727142625, -0.7505284618312464, 0.0037384390323656203, -0.6678743938665241, 0.917231716139375, 0.05960885292612439, -0.7585782758281971, 0.8275111343144115, 0.7821852602229209, 0.29970244548911523, 0.9737979047604144, -0.2384723067003974, -0.7854491847031548, 0.44652781127984964, 0.9127919336231882, 0.3309096816699824, -0.9071832356948208, -0.23802066581485848};
+	uint16_t _round;
+	
+	// Neural net config
+	// These are actually float values. But in order to prevent rounding errors and stuff, they are provided
+	// in form of the raw binary data of the IEE754 floating point numbers.
+	// In _decide() there's code to memcpy()-convert them to a float.
+	//   Round 340, 223.4 points, length 39, 36% stopped, 64% died
+	// const uint32_t _weights[36] = {0xbd8e626e, 0xbee2cd2c, 0x3e4d5cab, 0x3eceb8c3, 0xbed0a514, 0x3ec62438, 0x3e947ed4, 0xbe4b8bf2, 0xbf301113, 0xbf3f0a75, 0x3f1868f7, 0xbf0253ca, 0xbedca2f2, 0xbd547c6d, 0x3edd6a8a, 0xbd4b97b6, 0x3f64ec26, 0xbe5323c1, 0x3eccf87d, 0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbf03f08e, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3dcd78, 0x3f37a58d, 0x3ef4a25b};
+	//   Round 630, 221.0 points, length 38, 36% stopped, 64% died
+	const uint32_t _weights[36] = {0xbd25943f, 0xbf279d81, 0x3e25d128, 0x3ec62438, 0x3f0e719c, 0x3eefbea9, 0x3e947ed4, 0xbe5323c1, 0xbf2d4796, 0xbf3f0a75, 0x3f0e45d9, 0xbf0253ca, 0xbedca2f2, 0xbd79073c, 0x3ede80ec, 0xbd4b97b6, 0x3f69a6be, 0xbe4b8bf2, 0x3eccf87d, 0xbf301113, 0xbf62b6e8, 0xbf71daf6, 0xbf204130, 0xbf222609, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4954eb, 0x3eef2619, 0xbe849370, 0xbf18fb2b, 0x3f25bbd1, 0xbf3b4e44, 0x3f484d59, 0x3edd6a8a};
+	  
+	const uint8_t _net_layout[3] = {6, 4, 3};
+	const uint8_t _net_layers = 3;
+	const uint8_t _net_total_size = 36;
+	
+	
 	uint16_t _xy2i(uint8_t x, uint8_t y);
 	uint16_t _xy2i(Coords c);
 	Coords _i2xy(uint16_t i);
--- a/src/effect_snake.cpp
+++ b/src/effect_snake.cpp
@@ -10,6 +10,7 @@ SnakeEffect::SnakeEffect() {

 void SnakeEffect::_init() {
 	_dying = 0;
+	_round = 0;
 	_last_apple_at = millis();
 	_last_move_at = millis();
 	_dir = SNAKE_DIR_NORTH;
@@ -17,6 +18,9 @@ void SnakeEffect::_init() {
 	_pos = {(uint8_t)(window->width/2), (uint8_t)(window->height/2)};
 	for (int i=0; i<_pixels; i++) _map[i]=0;
 	_map[_xy2i(_pos)]=1;
+	_map[_xy2i(_pos)+window->width*1]=2;
+	_map[_xy2i(_pos)+window->width*2]=3;
+	_map[_xy2i(_pos)+window->width*3]=4;
 	_place_apple();
 }

@@ -26,6 +30,10 @@ SnakeEffect::~SnakeEffect() {
 }

 void SnakeEffect::_place_apple() {
+	if (SNAKE_DEBUG) {
+		_apple = {3, 3};
+		return;
+	}
 	if (_length < _pixels) {
 		uint8_t start = random8(_pixels);
 		for (int i=0; i<_pixels; i++) {
@@ -45,27 +53,50 @@ void SnakeEffect::_decide() {
 	uint8_t a_s = _to_apple(_dir);
 	uint8_t a_r = _to_apple(_dir + 1);
 	
-	uint8_t inputs[6] = {f_l, f_s, f_r, a_l, a_s, a_r};
-	
-	float outputs[3] = {0.0, 0.0, 0.0};
-	
-	for (int i=0; i<18; i++) {
-		uint8_t out = i/6;
-		uint8_t in  = i%6;
-		outputs[out] += _weights[i] * inputs[in];
+	float* inputs = new float[6];
+	inputs[0] = f_l;
+	inputs[1] = f_s;
+	inputs[2] = f_r;
+	inputs[3] = a_l;
+	inputs[4] = a_s;
+	inputs[5] = a_r;
+	if (SNAKE_DEBUG) LOGln("SnakeEffect * Position: %d, %d - Inputs: %3.1f %3.1f %3.1f %3.1f %3.1f %3.1f", _pos.x, _pos.y, inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5]);
+	float* outputs = NULL;
+	uint8_t i=0;
+	for (uint8_t layer=1; layer<_net_layers; layer++) {
+		outputs = new float[_net_layout[layer]];
+		for (uint8_t j=0; j<_net_layout[layer]; j++) {
+			outputs[j] = 0.0;
+		}
+		for (uint8_t idx_out=0; idx_out<_net_layout[layer]; idx_out++) {
+			for (uint8_t idx_in=0; idx_in<_net_layout[layer-1]; idx_in++) {
+				float weight;
+				memcpy(&weight, &(_weights[i]), sizeof(weight));
+				outputs[idx_out] += weight * inputs[idx_in];
+				//outputs[idx_out] += (*(float*)&(_weights[i])) * inputs[idx_in];
+				i++;
+			}
+		}
+		delete inputs;
+		inputs = outputs;
 	}
 	
 	int8_t decision = 0;
-	if (outputs[0]>=outputs[1] && outputs[0]>=outputs[2]) {
-		decision = -1;
-	} else if (outputs[1]>=outputs[2]) {
-		decision = 0;
-	} else {
-		decision = 1;
+	float last;
+	for (uint8_t i=0; i<_net_layout[_net_layers - 1]; i++) {
+		if (i==0 || outputs[i]>last) {
+			last = outputs[i];
+			decision = i;
+		}
 	}
+	decision = decision - 1;
+	delete outputs;
+	
+	if (SNAKE_DEBUG) LOGln("SnakeEffect * Decision: %d", decision);

 	_dir += decision;
 	if (_dir < 0) _dir += 4;
+	if (_dir > 3) _dir -= 4;
 }

 /**
@@ -94,8 +125,7 @@ int8_t SnakeEffect::_manual_decision() {
 }*/

 bool SnakeEffect::_is_free(uint8_t dir) {
-	Coords np = _new_pos(dir);
-	return np.x>=0 && np.x<window->width && np.y>=0 && np.y<window->height && _map[_xy2i(np)]==0;
+	return _free_spaces(dir)!=0;
 }

 uint8_t SnakeEffect::_free_spaces(uint8_t dir) {
@@ -109,15 +139,15 @@ uint8_t SnakeEffect::_free_spaces(uint8_t dir) {
 		case SNAKE_DIR_WEST:  x=-1; break;
 	}
 	Coords p(_pos);
-	uint8_t i;
-	for(i=0; i<window->width || i<window->height; i++) {
+	uint8_t i=0;
+	while (true) {
 		p.x += x;
 		p.y += y;
 		if (p.x<0 || p.x>=window->width || p.y<0 || p.y>=window->height || _map[_xy2i(p)]!=0) {
-			break;
+			return i;
 		}
+		i++;
 	}
-	return i;
 }

 uint8_t SnakeEffect::_to_apple(uint8_t dir) {
@@ -159,10 +189,7 @@ Coords SnakeEffect::_i2xy(uint16_t i) {
 }

 void SnakeEffect::_move() {
-	if (_dying==0 && !_is_free(_dir)) {
-		_dying = 150;
-		return;
-	}
+	
 	
 	if (_dying > 0) {
 		_dying--;
@@ -175,11 +202,21 @@ void SnakeEffect::_move() {
 	}
 	
 	unsigned long now = millis();
-	if (_last_move_at < now && now - _last_move_at < 100) {
+	if (_last_move_at < now && now - _last_move_at < 0) {
 		return;
 	}
+	_round++;
 	_last_move_at = now;
+	_decide();
+	
+	if (_dying==0 && !_is_free(_dir)) {
+		_dying = 150;
+		return;
+	}
+	
 	_pos = _new_pos(_dir);
+	if (SNAKE_DEBUG) LOGln("SnakeEffect * new_pos: %d, %d", _pos.x, _pos.y);
+	if (SNAKE_DEBUG) LOGln("SnakeEffect * apple:   %d, %d", _apple.x, _apple.y);
 	if (_pos.x==_apple.x && _pos.y==_apple.y) {
 		_last_apple_at = millis();
 		_length++;
@@ -215,12 +252,9 @@ void SnakeEffect::loop(uint16_t ms) {
 	//CRGB color(CHSV(hue, 200, 255));
 	//window->setPixel(this->coords.x, this->coords.y, &color);
 	//hue++;
-	if (millis() < _last_apple_at || millis() - _last_apple_at > 30000) {
+	if (_dying==0 && (millis() < _last_apple_at || millis() - _last_apple_at > 30000)) {
 		_dying = 150;
 	}
-	if (_dying==0) {
-		_decide();
-	}
 	_move();
 	_draw();
 }
--- a/src/tools/snakenet/plot
+++ b/src/tools/snakenet/plot
@@ -1,2 +1,6 @@
 #!/usr/bin/gnuplot -c
-set term dumb 79 49; plot 'data_set.dat' notitle
+set term dumb 79 49
+plot 'data_set.dat' using 1:2 title 'Points', \
+# 'data_set.dat' using 1:3 title 'Length' axes x1y2, \
+# 'data_set.dat' using 1:4 title 'Stopped' axes x1y2, \
+# 'data_set.dat' using 1:5 title 'Dead' axes x1y2
--- a/src/tools/snakenet/snakenet.rb
+++ b/src/tools/snakenet/snakenet.rb
@@ -14,8 +14,10 @@ class Game
 	POINTS_MOVING_FAR = -1.5
 	
 	attr_reader :points, :dead, :ai, :length
+	attr_accessor :apple
 	
-	def initialize(a)
+	def initialize(a, debug=false)
+		@debug = debug
 		@ai = a
 		@data = [0]*(WIDTH*HEIGHT)
 		@dir = 0
@@ -29,6 +31,7 @@ class Game
 		@dead = false
 		@round = 0
 		@last_apple_at = 0
+		@count_left = @count_right = 0
 		place_apple()
 	end
 	
@@ -63,6 +66,8 @@ class Game
 		return if @dead
 		decision = @ai.decide(free?(@dir-1), free?(@dir), free?(@dir+1), apple?(@dir-1), apple?(@dir), apple?(@dir+1))
 		#puts "Decision: #{decision}"
+		@count_left += 1 if decision==-1
+		@count_right += 1 if decision==1
 		@dir = (@dir + decision) % 4
 		if (free?(@dir)==0)
 			#puts "Dead."
@@ -73,11 +78,11 @@ class Game
 		move
 	end
 	
-	def ranking; @length*10 - (@dead ? 200 : 0) - (since_last_apple >= 160 ? 100 : 0); end
+	def ranking; @length*10 - (@dead ? 200 : 0) - (stopped? ? 100 : 0) - (@count_right - @count_left).abs * 0.05; end
 	
 	def move
 		newpos = calc_new_pos(@pos, @dir)
-		#puts "Newpos: #{newpos}"
+		puts "Newpos: #{newpos}" if @debug
 		if newpos==@apple
 			@length+=1
 			@points += POINTS_APPLE
@@ -156,23 +161,41 @@ class Game
 		@dead = true
 	end
 	
-	def stopped?; since_last_apple >= WIDTH*HEIGHT*1.5; end
+	def stopped?; since_last_apple >= WIDTH*HEIGHT*2; end
 	
 	def ai_ranking; ai.ranking; end
 end

 class AI
-	attr_reader :weights
+	NETWORK_LAYOUT = [6, 4, 3]
+	attr_reader :weights, :id
 	attr_accessor :ranking, :rounds, :count_dead, :count_stopped, :sum_length
 	
-	def initialize(w=nil)
+	def initialize(w=nil, debug=false)
+		@debug = debug
 		reset()
 		@rounds = 1
+		@id = rand(0xFFFFFF)
 		if w==nil
-			@weights = Array.new(18) { rand() * 2.0 - 1.0 }
+			@weights = Array.new(network_size()) { rand() * 2.0 - 1.0 }
+			puts "Initialized with random values: #{@weights}" if @debug
 		else
-			@weights = w
+			if w[0].is_a? Integer
+				@weights = w.map{|s| s.to_s(16).rjust(8, "0").split("").each_slice(2).to_a.map(&:join).map{|s| s.to_i(16).chr}.join.unpack("g")}.flatten
+			else
+				@weights = w
+			end
+			puts "Initialized with given values: #{@weights}" if @debug
 		end
+		
+	end
+	
+	def network_size
+		s = 0
+		(0...(NETWORK_LAYOUT.count-1)).each do |i|
+			s += NETWORK_LAYOUT[i] * NETWORK_LAYOUT[i+1]
+		end
+		return s
 	end
 	
 	def reset
@@ -191,43 +214,59 @@ class AI
 	
 	def decide(left_free, straight_free, right_free, apple_left, apple_straight, apple_right)
 		inputs = [left_free, straight_free, right_free, apple_left, apple_straight, apple_right]
-		#pp inputs
-		outputs = [0, 0, 0]
-		(0...18).each do |x|
-			o = x/6
-			i = x%6
-			outputs[o] += inputs[i] * @weights[x]
+		puts "Inputs: #{inputs}" if @debug
+		outputs = nil
+		x = 0
+		(1...(NETWORK_LAYOUT.count)).each do |i|
+			c_in = NETWORK_LAYOUT[i-1]
+			c_out = NETWORK_LAYOUT[i]
+			outputs = Array.new(c_out){0.0}
+			(0...c_out).each do |o|
+				(0...c_in).each do |i|
+					outputs[o] += inputs[i] * @weights[x]
+					x+=1
+				end
+			end
+			inputs = outputs
 		end
+
 		max = 0
 		take = 0
-		(0...3).each do |x|
+		(0...(NETWORK_LAYOUT.last)).each do |x|
 			if outputs[x]>max
 				max = outputs[x]
 				take = x
 			end
 		end
+		puts "Decision: #{take-1}" if @debug
 		return take-1
 	end
 	
 	def evolve
 		w = @weights.dup
-		action = rand(4)
+		action = rand(5)
 		if action==0 #swap
-			i1 = rand(18)
-			i2 = rand(18)
+			i1 = rand(network_size())
+			i2 = rand(network_size())
 			temp = w[i1]
 			w[i1] = w[i2]
 			w[i2] = temp
 		elsif action==1 #change single value
-			i = rand(18)
+			i = rand(network_size())
 			w[i] = rand() * 2 - 1.0
 		elsif action==2 #invert single value
-			i = rand(18)
+			i = rand(network_size())
 			w[i] *= -1.0
+		elsif action==3
+			(0...network_size()).each do |i|
+				w[i] = rand() * 2 - 1.0 if rand(5)==0
+			end
 		else #change multiple values
-			(0...18).each do |i|
+			(0...network_size()).each do |i|
 				if (rand(5)==0)
-					w[i] = rand() * 2 - 1
+					w[i] += rand() / 5.0 - 0.1
+					w[i] = 1.0 if w[i]>1.0
+					w[i] = -1.0 if w[i]<-1.0
 				end
 			end
 		end
@@ -238,7 +277,7 @@ class AI
 	def merge(ai)
 		w = @weights.dup
 		w2 = ai.weights.dup
-		(0...18).each do |i|
+		(0...network_size()).each do |i|
 			if rand(2)==0
 				w[i] = w2[i]
 			end
@@ -249,19 +288,33 @@ class AI
 	def average(ai)
 		w = @weights.dup
 		w2 = ai.weights
-		(0...18).each do |i|
+		(0...network_size()).each do |i|
 			w[i] = (w[i] + w2[i]) / 2.0
 		end
 		return AI.new(w)
 	end
 	
 	def dump
-		puts "Data:"
-		puts "float _weights[18] = {#{@weights.join(", ")}};"
+		puts "const uint32_t _weights[#{network_size()}] = {#{@weights.map{|x| "0x" + [x].pack('g').split("").map(&:ord).map{|i| i.to_s(16).rjust(2, '0')}.join}.join(", ")}};"
 		#puts "Simplified: #{simplified}"
 	end
 end

+## Simulate
+=begin
+ai = AI.new([0xbd547c6d, 0xbedc84a5, 0x3e750239, 0x3ec5ae8a, 0xbcc9a683, 0x3f18715a, 0x3e947ed4, 0xbe4b8bf2, 0xbf2ee4ec, 0xbf3f0a75,
+0x3f5392dc, 0xbf06687b, 0xbedca2f2, 0xbcde3698, 0x3edd6a8a, 0xbd7284ca, 0x3ea7bac9, 0xbe5323c1, 0x3eccf87d,
+0xbf2d4796, 0xbf62b6e8, 0xbf71daf6, 0xbeff40aa, 0xbf207014, 0x3e26c03c, 0xbf497837, 0xbee4d175, 0x3ec601de, 0x3e4e0695, 0x3eef2619,
+0xbe849370, 0xbf18fb2b, 0x3f128e17, 0xbf3dcd78, 0x3f517299, 0x3eef3270], true)
+g = Game.new(ai, true)
+g.apple = [3, 3]
+10.times do
+	g.loop
+end
+exit
+=end
+
+
 graph = File.open(File.dirname(__FILE__) + "/data_set.dat", "w")
 graph.puts("# Round -  Points -  Length -  Stopped -  Dead")

@@ -272,6 +325,7 @@ games = []
 	ais[x] = AI.new#(SEEDS.sample)
 end

+best_old_game = nil
 best_old_ai = nil
 begin
 loop do
@@ -298,7 +352,13 @@ loop do
 	games_sorted = games.sort_by(&:ai_ranking).reverse.take(5)
 	g = games_sorted[0]
 	
-	puts "Round %5d: %7.1f points, length %3.0f, %3.0f%% stopped, %3.0f%% dead - {%s}" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.weights.map{|v| v.truncate(1).to_s.rjust(4)}.join(", ")]
+	if (round-1)%50==0
+		puts "----------------------------------------------------"
+		puts "Round |  Points | Length | Stopped | Dead | ID      "
+		puts "----------------------------------------------------"
+	end
+
+	puts "%5d | %7.1f | %6.0f | %6.0f%% | %3.0f%% | 0x%06x" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100, g.ai.id]
 	graph.puts("%d %f %f %f %f" % [round, g.ai_ranking / GAMES_PER_ROUND, g.ai.sum_length.to_f / GAMES_PER_ROUND, g.ai.count_stopped.to_f / GAMES_PER_ROUND * 100, g.ai.count_dead.to_f / GAMES_PER_ROUND * 100])
 	graph.flush
 	
@@ -306,7 +366,8 @@ loop do
 		g.ai.dump
 	end
 	
-	best_old_ai = g.ai
+	best_old_game = g
+	best_old_ai = g.ai.dup
 	
 	ais = []
 	games_sorted.each do |g|
@@ -331,6 +392,9 @@ loop do
 	round+=1
 end
 rescue SystemExit, Interrupt
+	puts
+	puts
+	puts "//   Round %d, %5.1f points, length %3d, %2.0f%% stopped, %2.0f%% died" % [round-1, best_old_game.ai_ranking / GAMES_PER_ROUND, best_old_ai.sum_length.to_f / GAMES_PER_ROUND, best_old_ai.count_stopped.to_f / GAMES_PER_ROUND * 100, best_old_ai.count_dead.to_f / GAMES_PER_ROUND * 100]
 	best_old_ai.dump
 	graph.close
 end