/*
 * File:        SigmaPiNetwork.java
 * Author:      Justin Basilico
 * Course:      PO CS 152: Neural Networks
 * Assignment:  Final Project
 * Updated:     2001.12.18
 * Created:     2001.12.03
 *
 * Description:
 * This file contains the SigmaPiNetwork class, which implements a
 * FeedForwardNetwork that is a sigma-pi network.
 *
 * Copyright:    Justin Basilico (2001).
 */

import java.util.Random;

/**
 * SigmaPiNetwork class
 *
 * This class implements a FeedForwardNetwork that is a sigma-pi network. A
 * sigma-pi network is defined such that layers of pi (product) or sigma (sum)
 * units. Each unit has a defined ActivationFunction, which all default to
 * be sigmoid activation functions (SigmoidFunction.INSTANCE). The pi units
 * get as their input to the activation functions the product of all of the
 * units on the previous layers times the weight for that unit. The only
 * exception to this is if the weight is 0.0, in which case it is assumed that
 * there is no connection between those two units, so it is not taken into
 * account in the product. For the sigma units, the input to the activation
 * function is the sum of the activations of the previous units times the
 * weights to those units. This is how the activation is fed forward through
 * the network.
 *
 * @author  Justin Basilico
 * @version 2001.12.18
 * @see     ActivationFunciton
 * @see     SigmoidFunction
 * @see     LinearFunction.
 */
public class SigmaPiNetwork
    extends FeedForwardNetwork
{
    /** functions
     * The array of arrays of ActivationFunctions, one for each unit in the
     * network, that gives the activation value of the unit from its given
     * input. */
    protected ActivationFunction[][]
                            functions = null;

    /**
     * SigmaPiNetwork 2-layer constructor
     *
     * This constructor takes the input and output UnitEncoders along with
     * a Random object and creates a 2-layer FeedForwardNetwork that use the
     * given encoders. The size of the input and output units are determined
     * by the dimensionality of the UnitEncoders. This network has no hidden
     * units. It initializes the network to have all SigmoidFunctions for the
     * activation functions in the network.
     *
     * @param  inputEncoder The UnitEncoder used for encoding NetworkInputs
     *         into the input layer of the network.
     * @param  outputEncoder The UnitEncoder used for encoding NetworkInputs
     *         into the target output for the network.
     * @param  randomGenerator The Random object used for random number
     *         generation in the network.
     */
    public SigmaPiNetwork(
        UnitEncoder         inputEncoder,       // Encoder for input units.
        UnitEncoder         outputEncoder,      // Encoder for target output.
        Random              randomGenerator)    // Random number generator.
    {
        // Initialize the super class.
        super(inputEncoder, outputEncoder, randomGenerator);

        // Start out with the activation functions all SigmoidFunctions.
        this.functions = new ActivationFunction[numLayers - 1][];
        for (int i = 0; i < functions.length; i++)
        {
            functions[i] = new ActivationFunction[layerSizes[i + 1] + 1];
            for (int j = 0; j < functions[i].length; j++)
                functions[i][j] = SigmoidFunction.INSTANCE;
        }
    }

    /**
     * SigmaPiNetwork basic constructor
     *
     * This constructor takes an input and output UnitEncoder, an array of
     * integers that specify the sizes of the hidden layers (null for no
     * hidden layers), and the Random object that can be used by the network
     * for random number generation. It initializes the network to have the
     * given number of layers (at least 2, one for input and one for output)
     * and uses the dimensionality of the input encoder for the size of the
     * input layer, the dimensionality of the output encoder for the size of
     * the output layer, and the array of hidden layer sizes for the size of
     * each hidden layer. The network is initialized to have bias units on
     * each layer with an activation of 1.0 and all other units have an
     * activation of 0.0. In addition, all weights in the network are set to
     * 0.0. It initializes the network to have all SigmoidFunctions for the
     * activation functions in the network.
     *
     * @param  inputEncoder The UnitEncoder used for encoding NetworkInputs
     *         into the input layer of the network.
     * @param  outputEncoder The UnitEncoder used for encoding NetworkInputs
     *         into the target output for the network.
     * @param  hiddenLayserSizes The array of integers that specifies the
     *         number of hidden layers by its length and the number of units
     *         on each hidden layer by its values. If it is of length 0 or is
     *         null, no hidden layers are created.
     * @param  randomGenerator The Random object used for random number
     *         generation in the network.
     */
    public SigmaPiNetwork(
        UnitEncoder         inputEncoder,       // Encoder for input units.
        UnitEncoder         outputEncoder,      // Encoder for target output.
        int[]               hiddenLayerSizes,   // Sizes of hidden layers.
        Random              randomGenerator)    // Random number generator.
    {
        // Initialize the super class.
        super(inputEncoder, outputEncoder, hiddenLayerSizes, randomGenerator);

        // Start out with activation functions all SigmoidFunctions.
        this.functions = new ActivationFunction[numLayers - 1][];
        for (int i = 0; i < functions.length; i++)
        {
            functions[i] = new ActivationFunction[layerSizes[i + 1] + 1];
            for (int j = 0; j < functions[i].length; j++)
                functions[i][j] = SigmoidFunction.INSTANCE;
        }
    }

    /**
     * feedForward
     *
     * This method uses the current values on the input layer to feed those
     * values through the network to the output layer. It assumes that every
     * other layer is a pi layer than a sigma layer and propagates the
     * activation forward using the product and summation rules for the
     * values and then using the activation functions for the network.
     */
    public void feedForward()
    {
        // Feed the activation through the network.
        for (int i = 0; i < numLayers - 1; i++)
        {
            // Propagate the layer of pi units.
            piPropagate(units[i], units[i + 1], weights[i],
                functions[i]);

            // Go to the next layer.
            i++;

            if ( i < numLayers - 1 )
                // Propagate the layer of sigma units.
                sigmaPropagate(units[i], units[i + 1], weights[i],
                    functions[i]);
            // else - We already did the output units.
        }
    }

    /**
     * sigmaPropagate
     *
     * This method takes the array of units the activation is coming from,
     * the array of units the activation is going to, the matrix of weights
     * indexed by the input layer, and the array of activation functions for
     * the units the values are going to. It propagates the values from the
     * given from layer to the given to layer by assuming that the to layer
     * is sigma units.
     *
     * @param  from The array of double unit activation values of the units to
     *         propagate from.
     * @param  to The array of double unit activation values of the units to
     *         propagate to as a sigma layer.
     * @param  weights The weight matrix of the weight values going into the
     *         layer.
     * @param  functionIn The ActivationFunctions of the units that are being
     *         propagated into.
     */
    private static void sigmaPropagate(
        double[]                from,           // From unit values.
        double[]                to,             // To unit values.
        double[][]              weightsIn,      // Weights values.
        ActivationFunction[]    functionsIn)    // Unit activation values.
    {
        // Calculate the sum for each unit.
        double sum = 0.0;
        for (int i = 1; i < to.length; i++)
        {
            // Start out with a sum of 0.0.
            sum = 0.0;

            // Calculate the sum of the weights time the input activations.
            for (int j = 0; j < from.length; j++)
                // Multiply the weight times the activation and add it to the
                // current sum.
                sum += weightsIn[i][j] * from[j];

            // Set the value of the ith unit by using the activation function
            // on the total sum.
            to[i] = functionsIn[i].getUnitActivation(sum);
        }
    }

    /**
     * piPropagate
     *
     * This method takes the array of units the activation is coming from,
     * the array of units the activation is going to, the matrix of weights
     * indexed by the input layer, and the array of activation functions for
     * the units the values are going to. It propagates the values from the
     * given from layer to the given to layer by assuming that the to layer
     * is pi units.
     *
     * @param  from The array of double unit activation values of the units to
     *         propagate from.
     * @param  to The array of double unit activation values of the units to
     *         propagate to as a pi layer.
     * @param  weights The weight matrix of the weight values going into the
     *         layer.
     * @param  functionIn The ActivationFunctions of the units that are being
     *         propagated into.
     */
    private static void piPropagate(
        double[]                from,           // From unit values.
        double[]                to,             // To unit values.
        double[][]              weightsIn,      // Weights values.
        ActivationFunction[]    functionsIn)    // Unit activation values.
    {
        // Calculate the product for each unit.
        double product = 1.0;
        for (int i = 1; i < to.length; i++)
        {
            // Start out with a product of 1.0.
            product = 1.0;

            // Calculate the product of the weights time the input
            // activations.
            for (int j = 0; j < from.length; j++)
                if ( weightsIn[i][j] != 0.0 )
                    // Multiply the weight value times the activation times
                    // the current product.
                    product *= weightsIn[i][j] * from[j];
                // else - No connection between these units.

            // Set the value of the ith unit by using the activation function
            // on the total product.
            to[i] = functionsIn[i].getUnitActivation(product);
        }
    }

    /**
     * setActivationFunctions
     *
     * This method takes an array of arrays of new ActivationFunctions to use
     * for the SigmaPiNetwork.
     *
     * @param  newFunctions The array of arrays of ActivationFunctions that
     *         will be used for calculating the activation values of the units
     *         in the network.
     */
    public void setActivationFunctions(
        ActivationFunction[][]  newFunctions)   // Activation functions.
    {
        // Copy over the activation functions.
        for (int i = 0; i < newFunctions.length && i < functions.length; i++)
        {
            for (int j = 0;
                 j < newFunctions[i].length && j < functions[i].length;
                 j++)
                // Copy over the function for this unit.
                functions[i][j] = newFunctions[i][j];
        }
    }

    /**
     * print
     *
     * This method prints out all of the information about this SigmaPiNetwork
     * to System.out. It just calls the print() method in the super class then
     * outputs information about the activation functions.
     *
     * @see    FeedForwardNetwork.print()
     */
    public void print()
    {
        super.print();

        System.out.println("    Activation functions:");
        System.out.print("                   1");
        for (int i = 1; i <= numInputs; i++)
            System.out.print(" I");
        System.out.println();

        for (int i = 0; i < (numLayers - 1); i++)
        {
            System.out.print("                   1");

            for (int j = 1; j <= layerSizes[i + 1]; j++)
            {
                if ( functions[i][j] instanceof LinearFunction )
                    System.out.print(" L");
                else if ( functions[i][j] instanceof SigmoidFunction )
                    System.out.print(" S");
                else
                    System.out.print(" O");
            }

            System.out.println();
        }
    }
}