/* * File: SigmaPiNetwork.java * Author: Justin Basilico * Course: PO CS 152: Neural Networks * Assignment: Final Project * Updated: 2001.12.18 * Created: 2001.12.03 * * Description: * This file contains the SigmaPiNetwork class, which implements a * FeedForwardNetwork that is a sigma-pi network. * * Copyright: Justin Basilico (2001). */ import java.util.Random; /** * SigmaPiNetwork class * * This class implements a FeedForwardNetwork that is a sigma-pi network. A * sigma-pi network is defined such that layers of pi (product) or sigma (sum) * units. Each unit has a defined ActivationFunction, which all default to * be sigmoid activation functions (SigmoidFunction.INSTANCE). The pi units * get as their input to the activation functions the product of all of the * units on the previous layers times the weight for that unit. The only * exception to this is if the weight is 0.0, in which case it is assumed that * there is no connection between those two units, so it is not taken into * account in the product. For the sigma units, the input to the activation * function is the sum of the activations of the previous units times the * weights to those units. This is how the activation is fed forward through * the network. * * @author Justin Basilico * @version 2001.12.18 * @see ActivationFunciton * @see SigmoidFunction * @see LinearFunction. */ public class SigmaPiNetwork extends FeedForwardNetwork { /** functions * The array of arrays of ActivationFunctions, one for each unit in the * network, that gives the activation value of the unit from its given * input. */ protected ActivationFunction[][] functions = null; /** * SigmaPiNetwork 2-layer constructor * * This constructor takes the input and output UnitEncoders along with * a Random object and creates a 2-layer FeedForwardNetwork that use the * given encoders. The size of the input and output units are determined * by the dimensionality of the UnitEncoders. This network has no hidden * units. It initializes the network to have all SigmoidFunctions for the * activation functions in the network. * * @param inputEncoder The UnitEncoder used for encoding NetworkInputs * into the input layer of the network. * @param outputEncoder The UnitEncoder used for encoding NetworkInputs * into the target output for the network. * @param randomGenerator The Random object used for random number * generation in the network. */ public SigmaPiNetwork( UnitEncoder inputEncoder, // Encoder for input units. UnitEncoder outputEncoder, // Encoder for target output. Random randomGenerator) // Random number generator. { // Initialize the super class. super(inputEncoder, outputEncoder, randomGenerator); // Start out with the activation functions all SigmoidFunctions. this.functions = new ActivationFunction[numLayers - 1][]; for (int i = 0; i < functions.length; i++) { functions[i] = new ActivationFunction[layerSizes[i + 1] + 1]; for (int j = 0; j < functions[i].length; j++) functions[i][j] = SigmoidFunction.INSTANCE; } } /** * SigmaPiNetwork basic constructor * * This constructor takes an input and output UnitEncoder, an array of * integers that specify the sizes of the hidden layers (null for no * hidden layers), and the Random object that can be used by the network * for random number generation. It initializes the network to have the * given number of layers (at least 2, one for input and one for output) * and uses the dimensionality of the input encoder for the size of the * input layer, the dimensionality of the output encoder for the size of * the output layer, and the array of hidden layer sizes for the size of * each hidden layer. The network is initialized to have bias units on * each layer with an activation of 1.0 and all other units have an * activation of 0.0. In addition, all weights in the network are set to * 0.0. It initializes the network to have all SigmoidFunctions for the * activation functions in the network. * * @param inputEncoder The UnitEncoder used for encoding NetworkInputs * into the input layer of the network. * @param outputEncoder The UnitEncoder used for encoding NetworkInputs * into the target output for the network. * @param hiddenLayserSizes The array of integers that specifies the * number of hidden layers by its length and the number of units * on each hidden layer by its values. If it is of length 0 or is * null, no hidden layers are created. * @param randomGenerator The Random object used for random number * generation in the network. */ public SigmaPiNetwork( UnitEncoder inputEncoder, // Encoder for input units. UnitEncoder outputEncoder, // Encoder for target output. int[] hiddenLayerSizes, // Sizes of hidden layers. Random randomGenerator) // Random number generator. { // Initialize the super class. super(inputEncoder, outputEncoder, hiddenLayerSizes, randomGenerator); // Start out with activation functions all SigmoidFunctions. this.functions = new ActivationFunction[numLayers - 1][]; for (int i = 0; i < functions.length; i++) { functions[i] = new ActivationFunction[layerSizes[i + 1] + 1]; for (int j = 0; j < functions[i].length; j++) functions[i][j] = SigmoidFunction.INSTANCE; } } /** * feedForward * * This method uses the current values on the input layer to feed those * values through the network to the output layer. It assumes that every * other layer is a pi layer than a sigma layer and propagates the * activation forward using the product and summation rules for the * values and then using the activation functions for the network. */ public void feedForward() { // Feed the activation through the network. for (int i = 0; i < numLayers - 1; i++) { // Propagate the layer of pi units. piPropagate(units[i], units[i + 1], weights[i], functions[i]); // Go to the next layer. i++; if ( i < numLayers - 1 ) // Propagate the layer of sigma units. sigmaPropagate(units[i], units[i + 1], weights[i], functions[i]); // else - We already did the output units. } } /** * sigmaPropagate * * This method takes the array of units the activation is coming from, * the array of units the activation is going to, the matrix of weights * indexed by the input layer, and the array of activation functions for * the units the values are going to. It propagates the values from the * given from layer to the given to layer by assuming that the to layer * is sigma units. * * @param from The array of double unit activation values of the units to * propagate from. * @param to The array of double unit activation values of the units to * propagate to as a sigma layer. * @param weights The weight matrix of the weight values going into the * layer. * @param functionIn The ActivationFunctions of the units that are being * propagated into. */ private static void sigmaPropagate( double[] from, // From unit values. double[] to, // To unit values. double[][] weightsIn, // Weights values. ActivationFunction[] functionsIn) // Unit activation values. { // Calculate the sum for each unit. double sum = 0.0; for (int i = 1; i < to.length; i++) { // Start out with a sum of 0.0. sum = 0.0; // Calculate the sum of the weights time the input activations. for (int j = 0; j < from.length; j++) // Multiply the weight times the activation and add it to the // current sum. sum += weightsIn[i][j] * from[j]; // Set the value of the ith unit by using the activation function // on the total sum. to[i] = functionsIn[i].getUnitActivation(sum); } } /** * piPropagate * * This method takes the array of units the activation is coming from, * the array of units the activation is going to, the matrix of weights * indexed by the input layer, and the array of activation functions for * the units the values are going to. It propagates the values from the * given from layer to the given to layer by assuming that the to layer * is pi units. * * @param from The array of double unit activation values of the units to * propagate from. * @param to The array of double unit activation values of the units to * propagate to as a pi layer. * @param weights The weight matrix of the weight values going into the * layer. * @param functionIn The ActivationFunctions of the units that are being * propagated into. */ private static void piPropagate( double[] from, // From unit values. double[] to, // To unit values. double[][] weightsIn, // Weights values. ActivationFunction[] functionsIn) // Unit activation values. { // Calculate the product for each unit. double product = 1.0; for (int i = 1; i < to.length; i++) { // Start out with a product of 1.0. product = 1.0; // Calculate the product of the weights time the input // activations. for (int j = 0; j < from.length; j++) if ( weightsIn[i][j] != 0.0 ) // Multiply the weight value times the activation times // the current product. product *= weightsIn[i][j] * from[j]; // else - No connection between these units. // Set the value of the ith unit by using the activation function // on the total product. to[i] = functionsIn[i].getUnitActivation(product); } } /** * setActivationFunctions * * This method takes an array of arrays of new ActivationFunctions to use * for the SigmaPiNetwork. * * @param newFunctions The array of arrays of ActivationFunctions that * will be used for calculating the activation values of the units * in the network. */ public void setActivationFunctions( ActivationFunction[][] newFunctions) // Activation functions. { // Copy over the activation functions. for (int i = 0; i < newFunctions.length && i < functions.length; i++) { for (int j = 0; j < newFunctions[i].length && j < functions[i].length; j++) // Copy over the function for this unit. functions[i][j] = newFunctions[i][j]; } } /** * print * * This method prints out all of the information about this SigmaPiNetwork * to System.out. It just calls the print() method in the super class then * outputs information about the activation functions. * * @see FeedForwardNetwork.print() */ public void print() { super.print(); System.out.println(" Activation functions:"); System.out.print(" 1"); for (int i = 1; i <= numInputs; i++) System.out.print(" I"); System.out.println(); for (int i = 0; i < (numLayers - 1); i++) { System.out.print(" 1"); for (int j = 1; j <= layerSizes[i + 1]; j++) { if ( functions[i][j] instanceof LinearFunction ) System.out.print(" L"); else if ( functions[i][j] instanceof SigmoidFunction ) System.out.print(" S"); else System.out.print(" O"); } System.out.println(); } } }