/*----------------------------------------------------------------------- * AdalineNeuralTLC.java * * Written by Steven Wyckoff, 2006 * * This TLC was writen as part of a project for CS152 Neural Networks and * Soft Computing class at Harvey Mudd College under professor Keller, * fall, 2006. The intention was to implement a neural network to * approximate the gain function in a Q-learning algorithm. Overall I * think it works quite well, the major limitation being the reward * and inputs given. * * Be aware that the XMLSerializable implementation doesn't actually * do anything so if you're going around saving things you'll have to * fix it. * * see www.cs.hmc.edu/~swyckoff/neural/neural.html for more information. */ /*----------------------------------------------------------------------- * Copyright (C) 2001 Green Light District Team, Utrecht University * * This program (Green Light District) is free software. * You may redistribute it and/or modify it under the terms * of the GNU General Public License as published by * the Free Software Foundation (version 2 or later). * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * See the documentation of Green Light District for further information. *------------------------------------------------------------------------*/ package gld.algo.tlc; import gld.*; import gld.sim.*; import gld.algo.tlc.*; import gld.infra.*; import gld.utils.*; import gld.xml.*; import java.io.IOException; import java.util.Random; import java.util.*; import java.awt.Point; class Adaline { public float[] m_weights; public int m_num_inputs; public Adaline(int num_inputs, Random rand) { m_num_inputs = num_inputs; //num_inputs+1 because we need to have a bias m_weights = new float[m_num_inputs+1]; for(int i = 0; i < m_num_inputs+1; i++) { m_weights[i] = rand.nextFloat(); } } public Adaline(float[] weights) { m_weights = weights; m_num_inputs = m_weights.length-1; } public float[] GetWeights() { return m_weights; } public void Train(float[] input, float desired, float learning_rate) { float raw = Evaluate(input); float error = desired - raw; float learning_factor = error * learning_rate; AdjustWeights(learning_factor, input); } public float Evaluate(float[] inputs) { float accum = 0.0f; for(int i = 0; i < m_num_inputs; i++) { accum += inputs[i] * m_weights[i]; } //add in a bias accum += m_weights[m_num_inputs]; return accum; } protected void AdjustWeights(float lrate, float[] errors) { for(int x = 0; x < m_num_inputs; x++) { m_weights[x] += lrate * errors[x]; } //remember, bias m_weights[m_num_inputs] += lrate; } } class NeuralNode { protected final static int green_index=0, red_index=1; Adaline[][] m_lane_q_functions; Node m_thisNode; Sign[] m_incomingSigns; int m_node_id; int m_num_lanes; int m_num_inputs; float m_gamma; float m_learning_rate; float m_reward_share; float[] m_reward_this_time; float[] m_gains; float[] m_previous_gains; float[] m_current_state; float[] m_previous_state; public NeuralNode(Infrastructure infra, int id, int num_inputs, float gamma, float learning, float reward_share, Random rand) { m_node_id = id; m_gamma = gamma; m_learning_rate = learning; m_reward_share = reward_share; Node[] nodes = infra.getAllNodes(); m_thisNode = nodes[m_node_id]; if (m_thisNode.getType() == Node.JUNCTION) { m_incomingSigns = ((Junction)m_thisNode).getSigns(); } else { m_incomingSigns = new Sign[0]; } m_num_lanes = m_incomingSigns.length; m_gains = new float[m_num_lanes]; m_previous_gains = new float[m_num_lanes]; m_lane_q_functions = new Adaline[m_num_lanes][]; m_reward_this_time = new float[m_num_lanes]; for(int i = 0; i < m_num_lanes; i++) { m_gains[i] = 0.0f; m_previous_gains[i] = 0.0f; m_lane_q_functions[i] = new Adaline[2]; m_lane_q_functions[i][0] = new Adaline(num_inputs,rand); m_lane_q_functions[i][1] = new Adaline(num_inputs,rand); m_reward_this_time[i] = 0.0f; } m_previous_state = null; m_current_state = null; } public void GrantReward(float reward, int tlId) { //give the lane the full reward //and the rest a discounted one for(int i = 0; i < m_num_lanes; i++) { if(m_incomingSigns[i].getId() == tlId) { m_reward_this_time[i] += reward; } else { m_reward_this_time[i] += reward * m_reward_share; } } } //CalcGains will store the old state, update the gains and then train the network public void CalcGains(float[] newState) { m_previous_state = m_current_state; m_current_state = newState; for(int lane = 0; lane < m_num_lanes; lane++) { m_previous_gains[lane] = m_gains[lane]; float red = m_lane_q_functions[lane][red_index].Evaluate(m_current_state); float green = m_lane_q_functions[lane][green_index].Evaluate(m_current_state); float dif = (green - red); m_gains[lane] = dif; } //this train must happen after calculating the new gains so that the reward //matches up properly with the stored previous state Train(); } //need to take into accout whether we were green or red. It might be just another part of the state/input data //sign.MayDrive() is true if we were green, red otherwise. We'll need to extract that info when we update state public void Train() { if(m_previous_state == null) { return; } for(int i = 0; i < m_num_lanes; i++) { float nextq = m_gains[i]; float reward_for_action = m_reward_this_time[i]; float updated_q_value = reward_for_action + (m_gamma * nextq); boolean lane_was_green = m_incomingSigns[i].mayDrive(); //System.out.println("lane "+i+" maydrive " + lane_was_green+" reward "+m_reward_this_time[i]+" next q "+nextq+" updated_q_value "+updated_q_value); if(lane_was_green) { m_lane_q_functions[i][green_index].Train(m_previous_state, updated_q_value, m_learning_rate); } else { m_lane_q_functions[i][red_index].Train(m_previous_state, updated_q_value, m_learning_rate); } //clean up our stuff m_reward_this_time[i] = 0.0f; } } public float GetQ(int laneNumber) { return m_gains[laneNumber]; } static float Max(float[] vals) { float max = Float.MIN_VALUE; float maxIndex = -1; for(int i = 0; i < vals.length; i++) { if(vals[i] >= max) { max = vals[i]; maxIndex = i; } } return max; } } public class AdalineNeuralTLC extends TLController implements XMLSerializable,TwoStageLoader//,InstantiationAssistant { protected final static String shortXMLName="tlc-adalineneural"; // TLC vars //protected Infrastructure infrastructure; protected TrafficLight[][] tls; protected Node[] allnodes; protected int num_nodes; protected int m_num_iters; protected static float[] RANDOM_CHANCE = {0.2f,0.03f};//A random gain setting is chosen instead of the on the TLC dictates with this chance protected static int RANDOM_THRESH = 500;//point at which it transitions to the next random chance protected static float PASS_REWARD = 100.0f; protected static float MOVE_REWARD = 5.0f; protected static float NOMOVE_REWARD = 0.0f; protected static float LEARNING_RATE = 0.01f; protected static float GAMMA_FACTOR = 0.5f; protected static float REWARD_SHARE = 0.1f; protected Random random; protected Infrastructure m_infrastructure; protected NeuralNode[] m_node_q_functions; protected int m_num_inputs; /** * Creates a new AdalineNeural Algorithm. * This TLC-algorithm uses adalines to approximate the Q function for each state/action pair * * @param i The infrastructure this algorithm will have to operate on */ public AdalineNeuralTLC(Infrastructure i) { super(i); //assistant=this; random=new Random(); m_num_iters = 0; } /** * Changes the Infrastructure this algorithm is working on * * @param i The new infrastructure for which the algorithm has to be set up */ public void setInfrastructure(Infrastructure infra) { random = new Random(); m_num_iters = 0; super.setInfrastructure(infra); m_infrastructure = infra; try{ Node[] nodes = m_infrastructure.getAllNodes(); num_nodes = nodes.length; //the number of inputs gets set here m_num_inputs = 3 * m_infrastructure.getAllInboundLanes().size(); m_node_q_functions = new NeuralNode[num_nodes]; for(int nodeId = 0; nodeId < num_nodes; nodeId++) { int num_decisions = tld[nodeId].length; //System.out.println("node: "+nodeId+" decisions: "+num_decisions+" num_inputs: "+m_num_inputs); m_node_q_functions[nodeId] = new NeuralNode(m_infrastructure, nodeId, m_num_inputs, GAMMA_FACTOR, LEARNING_RATE, REWARD_SHARE, random); } } catch(Exception e) { System.out.println("Rats, problem with creating the AdalineNeuralTLC"); } } /** * Calculates how every traffic light should be switched * @return Returns a double array of TLDecision's. These are tuples of a TrafficLight and the gain-value of when it's set to green. * @see gld.algo.tlc.TLDecision */ public TLDecision[][] decideTLs() { float[] input_set = new float[m_num_inputs]; int input_index = 0; //Roaduser ru; //ListIterator queue; int num_lanes; Drivelane d; for (int i=0; i < num_nodes; i++) { num_lanes = tld[i].length; for(int j=0; j < num_lanes; j++) { d = tld[i][j].getTL().getLane(); int num_waiting = d.getNumRoadusersWaiting(); int length = d.getLength(); input_set[input_index] = (float)num_waiting / (float)length; input_index++; input_set[input_index] = (float)num_waiting; input_index++; if(d.isFull()) input_set[input_index] = 0.0f; else input_set[input_index] = 1.0f; input_index++; /* * This doesn't do any good because there is no connection to any reward right now queue = d.getQueue().listIterator(); float ti = 0.0f; // For each waiting Roaduser for(int k = 0; k < num_waiting; k++) { ru = (Roaduser) queue.next(); int timehere = ru.getDrivelaneStartTime() - m_infrastructure.getCurCycle(); ti += timehere; } if(num_waiting > 0) { ti /= num_waiting; } input_set[input_index] = ti; input_index++; **/ } } //calculate the TL's' from the current state for each node using our network for(int i = 0; i < num_nodes; i++) { m_node_q_functions[i].CalcGains(input_set); } //Determine wheter it should be random or not m_num_iters++; float current_random_chance = 0.0f; if(m_num_iters > RANDOM_THRESH) { current_random_chance = RANDOM_CHANCE[1]; } else { current_random_chance = RANDOM_CHANCE[0]; } boolean randomrun = false; if (random.nextFloat() < current_random_chance) { randomrun = true; } //now lets fill in our TL table for the outside world for(int i = 0; i < num_nodes; i++) { int num_tl = tld[i].length; //this inner loop is to calculate the gain for each lane being green for(int j = 0; j < num_tl; j++) { float q = m_node_q_functions[i].GetQ(j); if(trackNode!=-1) { if(i==trackNode) { Drivelane currentlane = tld[i][j].getTL().getLane(); boolean[] targets = currentlane.getTargets(); System.out.println("node: "+i+" light: "+j+" gain: "+q+" "+targets[0]+" "+targets[1]+" "+targets[2]+" "+currentlane.getNumRoadusersWaiting()); } } // If this is a random run, set all gains randomly if(randomrun) { q = random.nextFloat(); } tld[i][j].setGain(q); } } return tld; } /** * Provides the TLC-algorithm with information about a roaduser that has had it's go in the moveAllRoaduser-cycle. * From this information it can be distilled whether a roaduser has moved or had to wait. * * This information will be used to figure out if there was a reward for the previous action. * * @param _ru * @param _prevlane * @param _prevsign * @param _prevpos * @param _dlanenow * @param _signnow * @param _posnow * @param _possiblelanes * @param _ranges */ public void updateRoaduserMove(Roaduser _ru, Drivelane _prevlane, Sign _prevsign, int _prevpos, Drivelane _dlanenow, Sign _signnow, int _posnow, PosMov[] posMovs, Drivelane _desired) { // Roaduser has just left the building! if(_dlanenow == null || _signnow == null) return; if(_prevsign.getType()==Sign.TRAFFICLIGHT && (_signnow.getType()==Sign.TRAFFICLIGHT || _signnow.getType()==Sign.NO_SIGN)) { int tlId = _prevsign.getId(); int nodeId = _prevsign.getNode().getId(); float reward = 0.0f; if(_prevsign == _signnow && _prevpos == _posnow) { //no move, no rerward reward = NOMOVE_REWARD; } else if(_prevsign != _signnow) { //pass a sign, very good reward = PASS_REWARD; } else { //not as good to move but not pass a sign reward = MOVE_REWARD; } m_node_q_functions[nodeId].GrantReward(reward,tlId); } } // XMLSerializable implementation // NOTICE: THIS IS NOT IMPLEMENTED IN ANY REAL FORM public void load (XMLElement myElement,XMLLoader loader) throws XMLTreeException,IOException,XMLInvalidInputException { //NUM_STEPS=myElement.getAttribute("num-steps-s").getIntValue(); //CROSSOVER_CHANCE=myElement.getAttribute("cross-prob").getFloatValue(); //MUTATION_CHANCE=myElement.getAttribute("mut-prob").getFloatValue(); //NUMBER_INDIVIDUALS=myElement.getAttribute("num-ind").getIntValue(); //NUMBER_GENERATIONS=myElement.getAttribute("num-gen").getIntValue(); num_nodes=myElement.getAttribute("num-nodes").getIntValue(); //num_step=myElement.getAttribute("num-steps-o").getIntValue(); //num_wait=myElement.getAttribute("num-wait").getIntValue(); //num_move=myElement.getAttribute("num-move").getIntValue(); //ind=(GenNeuralIndividual[])XMLArray.loadArray(this,loader,assistant); //pop=new BpropNeuralPopulation(); //loader.load(this,pop); } public XMLElement saveSelf () throws XMLCannotSaveException { XMLElement result=super.saveSelf(); result.setName(shortXMLName); //result.addAttribute(new XMLAttribute("num-steps-s",NUM_STEPS)); //result.addAttribute(new XMLAttribute("cross-pob",CROSSOVER_CHANCE)); //result.addAttribute(new XMLAttribute("mut-prob",MUTATION_CHANCE)); //result.addAttribute(new XMLAttribute("num-ind",NUMBER_INDIVIDUALS)); //result.addAttribute(new XMLAttribute("num-gen",NUMBER_GENERATIONS)); result.addAttribute(new XMLAttribute("num-nodes",num_nodes)); //result.addAttribute(new XMLAttribute("num-steps-o",num_step)); //result.addAttribute(new XMLAttribute("num-wait",num_wait)); //result.addAttribute(new XMLAttribute("num-move",num_move)); return result; } public void saveChilds (XMLSaver saver) throws XMLTreeException,IOException,XMLCannotSaveException { //XMLArray.saveArray(ind,this,saver,"members"); //saver.saveObject(pop); } public String getXMLName () { return "model."+shortXMLName; } // TwoStageLoader implementation public void loadSecondStage (Dictionary dictionaries) throws XMLInvalidInputException,XMLTreeException { super.loadSecondStage(dictionaries); //pop.loadSecondStage(dictionaries); //XMLUtils.loadSecondStage(new ArrayEnumeration(ind),dictionaries); } // Settings dialog public void showSettings(Controller c) { String[] descs = {"Threshold", "Reward for a car passing an intersection", "Reward for a car moving on a road", "Shared reward", "Gamma factor", "Learning Rate", "Random Chance 1", "Random Chance 2"}; int[] ints = {RANDOM_THRESH};//, RANDOM_THRESH[1], RANDOM_THRESH[2]}; float[] floats = {PASS_REWARD, MOVE_REWARD, REWARD_SHARE, GAMMA_FACTOR, LEARNING_RATE, RANDOM_CHANCE[0], RANDOM_CHANCE[1]}; TLCSettings settings = new TLCSettings(descs, ints, floats); settings = doSettingsDialog(c, settings); RANDOM_THRESH = settings.ints[0]; PASS_REWARD = settings.floats[0]; MOVE_REWARD = settings.floats[1]; REWARD_SHARE = settings.floats[2]; GAMMA_FACTOR = settings.floats[3]; LEARNING_RATE = settings.floats[4]; RANDOM_CHANCE[0] = settings.floats[5]; RANDOM_CHANCE[1] = settings.floats[6]; setInfrastructure(m_infrastructure); } }