/* * File: FeedForwardNetwork.java * Author: Justin Basilico * Course: PO CS 152: Neural Networks * Assignment: Final Project * Updated: 2001.12.18 * Created: 2001.12.03 * * Description: * This file contains the FeedForwardNetwork abstract class, which is a * NeuralNetwork that implements the common functionality among fully- * connected feed-forward networks. * * Copyright: Justin Basilico (2001). */ import java.util.Random; /** * FeedForwardNetwork abstract class * * This abstract class implements the common functionality among feed-forward * types of NeuralNetworks. In particular, the feed-forward network is assumed * to have layers of units is also assumed be fully connected between layers. * It also has a bias unit on each level. For a feed-forward network, the * values of a NetworkInput are encoded using a UnitEncoder into the input * layer and then the activation is spread forward through the network in some * manner to the output layer, with no recurrent connections. It is up to * sub-classes of the network to deal with exactly what the activation for * each level is, because there are many different types of architectures that * can be used on top of this one. In addition to the units and the weights, * the class also keeps track of the size of everything and it keeps an array * of units that are the target output that the network is trying to achieve * on a given input. This is done so that the actual input can be compared to * the target output and the error between them can be calculated. Another * UnitEncoder is used for taking a NetworkInput and encoding it into the * target output layer. * * @author Justin Basilico * @version 2001.12.18 * @see NeuralNetwork * @see UnitEncoder * @see NetworkInput */ public abstract class FeedForwardNetwork extends NeuralNetwork { /** numLayers * This is the total integer number of layers in the network, including * input, hidden, and output layers. It must always be at least 2. */ protected int numLayers = 0; // Total number of layers. /** numHiddenLayers * This is the integer number of hidden layers in the network. */ protected int numHiddenLayers = 0;// Number of hidden layers. /** numInputs * This is the integer number of inputs on the input layer (not including * the bias unit). It is specified by the dimensionality of the input * UnitEncoder. */ protected int numInputs = 0; // Number of input units. /** numOutputs * This is the integer number of outputs on the output layer and the size * of the target output (not including the bias unit). It is specified by * the dimensionality of the output UnitEncoder. */ protected int numOutputs = 0; // Number of output units. /** layerSizes * This array of integers represent the size of each layer in the * network, not including the bias units, which every layer has one of. */ protected int[] layerSizes = null; // Sizes of the layers. /** units * This array of arrays of doubles represents all of the units in the * network and contains their current activation values. Each layer has * the 0th element as the bias unit, which should always have an * activation of 1.0. The first array represent the input units and the * last on represents the output units. */ protected double[][] units = null; // Unit activations. /** weights * The array of matrices that represent the weight values between the * layers in the network. They are indexed by the to layer, then the to * unit, then the from unit. */ protected double[][][] weights = null; // Weight matrices. /** target * The array of doubles that represent the unit activations that the * output layer is trying to achieve on its current input. */ protected double[] target = null; // Target output. /** inputEncoder * The UnitEncoder that encodes NetworkInputs into the input layer of the * network. */ protected UnitEncoder inputEncoder = null; /** outputEncoder * The UnitEncoder that encodes NetworkInputs into the target output for * the network. */ protected UnitEncoder outputEncoder = null; /** randomGenerator * Random object for doing things with the FeedForwardNetwork that require * randomness. */ protected Random randomGenerator = null; /** * FeedForwardNetwork 2-layer constructor * * This constructor takes the input and output UnitEncoders along with * a Random object and creates a 2-layer FeedForwardNetwork that use the * given encoders. The size of the input and output units are determined * by the dimensionality of the UnitEncoders. This network has no hidden * units. * * @param inputEncoder The UnitEncoder used for encoding NetworkInputs * into the input layer of the network. * @param outputEncoder The UnitEncoder used for encoding NetworkInputs * into the target output for the network. * @param randomGenerator The Random object used for random number * generation in the network. */ public FeedForwardNetwork( UnitEncoder inputEncoder, // Encoder for input units. UnitEncoder outputEncoder, // Encoder for target output. Random randomGenerator) // Random number generator. { // Call the other constructor with no hidden units. this(inputEncoder, outputEncoder, null, randomGenerator); } /** * FeedForwardNetwork basic constructor * * This constructor takes an input and output UnitEncoder, an array of * integers that specify the sizes of the hidden layers (null for no * hidden layers), and the Random object that can be used by the network * for random number generation. It initializes the network to have the * given number of layers (at least 2, one for input and one for output) * and uses the dimensionality of the input encoder for the size of the * input layer, the dimensionality of the output encoder for the size of * the output layer, and the array of hidden layer sizes for the size of * each hidden layer. The network is initialized to have bias units on * each layer with an activation of 1.0 and all other units have an * activation of 0.0. In addition, all weights in the network are set to * 0.0. * * @param inputEncoder The UnitEncoder used for encoding NetworkInputs * into the input layer of the network. * @param outputEncoder The UnitEncoder used for encoding NetworkInputs * into the target output for the network. * @param hiddenLayserSizes The array of integers that specifies the * number of hidden layers by its length and the number of units * on each hidden layer by its values. If it is of length 0 or is * null, no hidden layers are created. * @param randomGenerator The Random object used for random number * generation in the network. */ public FeedForwardNetwork( UnitEncoder inputEncoder, // Encoder for input units. UnitEncoder outputEncoder, // Encoder for target output. int[] hiddenLayerSizes, // Sizes of hidden layers. Random randomGenerator) // Random number generator. { // Initialize the super-class. super(); // Determine the number of hidden layers. if ( hiddenLayerSizes == null ) { // No hidden layers. A total of 2 layers. this.numLayers = 2; this.numHiddenLayers = 0; } else { // We have hidden layers, as specified by the length of the given // array. this.numLayers = 2 + hiddenLayerSizes.length; this.numHiddenLayers = hiddenLayerSizes.length; } // The number of inputs and outputs is specified by the dimensionality // of the input and output encoders. this.numInputs = inputEncoder.getDimensionality(); this.numOutputs = outputEncoder.getDimensionality(); // Initialize the array of layer sizes. this.layerSizes = new int[this.numLayers]; // Put into the array of layer sizes the size of the input and output // layers. this.layerSizes[0] = this.numInputs; this.layerSizes[numLayers - 1] = this.numOutputs; if ( hiddenLayerSizes != null ) // Copy over all of the sizes of the hidden layers into the array // of layer sizes. for (int i = 0; i < hiddenLayerSizes.length; i++) // Copy the size of the ith hidden layer, which is the size of // the i+1 layer. this.layerSizes[i + 1] = hiddenLayerSizes[i]; // else - No hidden layer sizes to copy. // Create the array of units in the network. this.units = new double[this.numLayers][]; for (int i = 0; i < this.numLayers; i++) { // Create the proper number of units for this layer, including the // bias unit. units[i] = new double[layerSizes[i] + 1]; // Make all the unit activation values 0.0. Utilities.zeroArray(units[i]); // Set the bias unit to 1.0. units[i][0] = 1.0; } // Create the weights arrays. this.weights = new double[numLayers - 1][][]; for (int i = 0; i < weights.length; i++) { // Create the weight matrix for this layer. weights[i] = new double[layerSizes[i + 1] + 1][layerSizes[i] + 1]; // Set all of the weights to 0.0 initially. Utilities.zeroMatrix(weights[i]); } // Create the target output array, including the bias unit. this.target = new double[numOutputs + 1]; // Make the target output have all 0.0 to start. Utilities.zeroArray(target); // Set the value of the bias unit in the target output. target[0] = 1.0; // Use the input encoder and set the offset to 1 to deal with the bias // node. this.inputEncoder = inputEncoder; this.inputEncoder.setOffset(1); // Use the output encoder and set the offset to 1 to deal with the // bias node. this.outputEncoder = outputEncoder; this.outputEncoder.setOffset(1); // Use the given random number generator. this.randomGenerator = randomGenerator; } /** * FeedForwardNetwork default constructor * * Disable creation of empty FeedForwardNetworks. */ private FeedForwardNetwork() { } /** * feedForward * * This method uses the current values on the input layer to feed those * values through the network to the output layer. It must be implemented * by any sub-classes of FeedForwardNetwork. */ public abstract void feedForward(); /** * testOnInput * * This method takes a NetworkInput and loads it into both the input and * target output layers of the FeedForwardNetwork by using the input and * output UnitEncoders on the input. It then calls feedForward() to feed * the input activation through the network to get the output value for * that input. It returns the mean squared error for the network on that * input by calling calculateError(). * * @param input The NetworkInput to test the FeedForwardNetwork on. * @return The double mean squared error that the network has for the * given input. * @see FeedForwardNetwork.feedForward() * @see FeedForwardNetwork.loadInput(NetworkInput) * @see FeedForwardNetwork.loadTarget(NetworkInput) */ public double testOnInput( NetworkInput input) // NetworkInput to test. { // Load the input into the input layer. loadInput(input); // Load the target output into the target. loadTarget(input); // Feed the activation through the network. feedForward(); // Return the mean squared error. return calculateError(); } /** * loadInput * * This method takes a NetworkInput and uses the input UnitEncoder for * this network to encode the NetworkInput into the input layer of the * network. * * @param input The NetworkInput to load into the input layer of the * FeedForwardNetwork using the input UnitEncoder for the network. * @see FeedForwardNetwork.loadTarget(NetworkInput) */ public void loadInput( NetworkInput input) // NetworkInput to load. { // Encode the input into the input layer using the input encoder. inputEncoder.encode(input, units[0]); } /** * loadTarget * * This method takes a NetworkInput and uses the output UnitEncoder for * this network to encode the NetworkInput into the target output layer of * the network. * * @param input The NetworkInput to load into the target output layer of * the FeedForwardNetwork using the output UnitEncoder for the * network. * @see FeedForwardNetwork.loadInput(NetworkInput) */ public void loadTarget( NetworkInput input) // NetworkInput to load. { // Encode the input into the target output layer using the output // encoder. outputEncoder.encode(input, target); } /** * calculateError * * This method returns the mean squared error of the FeedForwardNetwork * by comparing the current activation values of the output units to the * target output units, which are the values that the output units should * have. * * @return The current mean squared error of the output in terms of the * target output, as a double. */ public double calculateError() { // Get the output units. double[] outputUnits = units[numLayers - 1]; // Calculate the sum of the error. double errorSum = 0.0; for (int i = 1; i < outputUnits.length; i++) { // The error is the target value minus the actual value. double o = outputUnits[i]; double t = target[i]; double error = t - o; // Add the error squared to the sum. errorSum += error * error; } // Return the mean squared error. return errorSum / (double) numOutputs; } /** * isCorrect * * This method takes a double threshold and returns true if the network's * output units are all within that threshold of the target output units. * Otherwise, it returns false. Thus, this tests the correctness of the * network within the given threshold. * * @param threshold The double threshold value for correctness. * @return True if all of the output values are within the threshold of * the target outputs. False otherwise. */ public boolean isCorrect( double threshold) // Correctness threshold. { // Get the output units. double[] outputUnits = units[numLayers - 1]; // Go through each output unit and see if it is within the threshold. for (int i = 1; i < outputUnits.length; i++) { // Check to see if the ith output unit is within the threshold // compared to the target output unit. if ( Math.abs(target[i] - outputUnits[i]) > threshold ) // Not within the threshold, return false. return false; // else - It is within the threshold. } // All of them are within the threshold. return true; } /** * getInput * * This method takes the integer index of a input unit and returns the * value of that unit. * * @param unitNum The integer index of the input unit to get the value * of. * @return The value of the given input unit. * @throws NullPointerException If unitNum is less than 0 or is greater * than the number of input units. */ public double getInput( int unitNum) // Input unit number to get. { return units[0][unitNum]; } /** * getOutput * * This method takes the integer index of a output unit and returns the * value of that unit. * * @param unitNum The integer index of the output unit to get the value * of. * @return The value of the given output unit. * @throws NullPointerException If unitNum is less than 0 or is greater * than the number of output units. */ public double getOutput( int unitNum) // Output unit number to get. { return units[numLayers - 1][unitNum]; } /** * getOutputs * * This method takes an array of doubles, which is assumed to be of the * size of the output units (including the bias node). It copies the * values of all the output units into the given array. * * @param array The array of doubles to copy the value of the output * units into. */ public void getOutputs( double[] array) // Array for the values. { // This is the output unit array. double[] outputUnits = units[numLayers - 1]; // Copy over the output units. for (int i = 0; i < array.length && i < outputUnits.length; i++) // Copy over the ith value. array[i] = outputUnits[i]; } /** * getTarget * * This method takes the integer index of a target output unit and returns * the value of that unit. * * @param unitNum The integer index of the target output unit to get the * value of. * @return The value of the given target output unit. * @throws NullPointerException If unitNum is less than 0 or is greater * than the number of output/target units. */ public double getTarget( int unitNum) // Target unit number to get. { return target[unitNum]; } /** * getTarget (array) * * This method takes an array of doubles, which is assumed to be of the * size of the output/target units (including the bias node). It copies * the values of all the target outputs into the given array. * * @param array The array of doubles to copy the value of the target * output units into. */ public void getTarget( double[] array) // Array for the values. { // Copy over each value. for (int i = 0; i < array.length && i < target.length; i++) // Copy the ith value. array[i] = target[i]; } /** * getNumLayers * * This method returns the total number of layers in the network, which * includes the input, output, and all hidden layers. * * Note: There is one more layer than the number of weight matrices. * * @return The integer number of layers in the network. It will always be * at least 2. */ public int getNumLayers() { return numLayers; } /** * getLayerSize * * This method takes the integer zero-based index number of a layer in the * network and returns the number of units on that layer. The input layer * is with index 0 and the output layer is with index getNumLayers() - 1. * * Note: The array of units on this layer will be one larger than the * size returned because every layer has a bias unit that is always 1.0. * * @param layerNum The zero-based integer index of the layer to get. * @return The integer size of the layer with the given index. * @throws NullPointerException If the given index is greater than or * equal to the number of layers. * @see FeedForwardNetwork.getNumLayers() */ public int getLayerSize( int layerNum) // Layer to get the size of. { return layerSizes[layerNum]; } /** * getInputSize * * This method returns the integer number of input units in this network. * * Note: The array of input units stored in the FeedForwardNetwork * actually has one more unit than the returned value because it has an * bias unit on the layer that is always 1.0. * * @return The integer number of input units in the network. */ public int getInputSize() { return numInputs; } /** * getOutputSize * * This method returns the integer number of output units in this network. * * Note: The array of output units stored in the FeedForwardNetwork * actually has one more unit than the returned value because it has an * extra bias unit. * * @return The integer number of output units in the network. */ public int getOutputSize() { return numOutputs; } /** * zeroWeights * * This method sets all of the weights in the FeedForwardNetwork to 0.0. */ public void zeroWeights() { for (int i = 0; i < weights.length; i++) // Zero the matrix of weights for each level. Utilities.zeroMatrix(weights[i]); } /** * getWeight * * This method takes the index of a weight in the network specified by the * integer layer number that the weight is going to, the integer index of * the unit on that layer the weight is going to and the integer index of * the unit on the previous layer that the weight is coming from. It * returns the value of that weight. It is assumed that on all layers that * the 0th unit is the bias unit. * * @param layerNum The integer index of the layer the weight goes to. It * must be at least 1 since the input layer has no incoming * weights. * @param toUnit The integer index of the unit on the given layer that * the weight goes to. * @param fromUnit The integer index of the unit on the previous layer * the weight comes from. * @return The double value of the specified weight. * @throws NullPointerException If any of the given indices are out of * bounds. */ public double getWeight( int layerNum, // In layer number int toUnit, // In unit number. int fromUnit) // Out unit number. { // Return the weight value. return weights[layerNum - 1][toUnit][fromUnit]; } /** * setWeight * * This method takes the index of a weight in the network specified by the * integer layer number that the weight is going to, the integer index of * the unit on that layer the weight is going to, the integer index of * the unit on the previous layer that the weight is coming from, and the * new double value for that weight. It sets the weight specified by * the given indices to that value. It is assumed that on all layers that * the 0th unit is the bias unit. * * @param layerNum The integer index of the layer the weight goes to. It * must be at least 1 since the input layer has no incoming * weights. * @param toUnit The integer index of the unit on the given layer that * the weight goes to. * @param fromUnit The integer index of the unit on the previous layer * the weight comes from. * @param newWeight The double that is the new value for the weight. * @throws NullPointerException If any of the given indices are out of * bounds. */ public void setWeight( int layerNum, // In layer number int toUnit, // In unit number. int fromUnit, // Out unit number. double newWeight) // New weight value. { // Set the weight to have the new value. weights[layerNum - 1][toUnit][fromUnit] = newWeight; } /** * print * * This method prints out all of the information about this * FeedForwardNetwork to System.out. */ public void print() { System.out.println("Network:"); System.out.println(" Layers: " + numLayers); System.out.println(" Hidden layers: " + numHiddenLayers); System.out.println(" Inputs: " + numInputs); System.out.println(" Outputs: " + numOutputs); System.out.print (" Layer sizes: "); Utilities.printArray(layerSizes); System.out.println(); System.out.println(" Units: "); for (int i = 0; i < units.length; i++) { System.out.print(" "); Utilities.printArray(units[i]); System.out.println(); } System.out.println(" Weights: "); for (int i = 0; i < weights.length; i++) { System.out.println(" Layer: " + i); for (int j = 0; j < weights[i].length; j++) { System.out.print(" "); Utilities.printArray(weights[i][j]); System.out.println(); } System.out.println(); } System.out.print (" Target: "); Utilities.printArray(target); System.out.println(); } }