// Block.cpp // David R. Morrison #include "block.hpp" // Constructor Block::Block(int s, int input_vector_size) { size = s; inputVectorSize = input_vector_size; cells.resize(size); // Set up weights for the gates, including a bias value inGateWeightArray = getRandomWeights(inputVectorSize + 1); forgetGateWeightArray = getRandomWeights(inputVectorSize + 1); outGateWeightArray = getRandomWeights(inputVectorSize + 1); // Everything is initially zero inputGateValue = 0.0; forgetGateValue = 0.0; outputGateValue = 0.0; outputGateSignal = 0.0; cellSignals.resize(size); // Initialize cells for (int i = 0; i < size; ++i) cells[i] = new Cell(inputVectorSize); } // Delete all the cells Block::~Block() { for (int i = 0; i < size; ++i) delete cells[i]; } // Calculate the values of the gates as a weighted sum of inputs void Block::calcGates(vector inputs) { assert(inputs.size() == inputVectorSize); // Take care of bias first inputGateValue = inGateWeightArray[0]; forgetGateValue = forgetGateWeightArray[0]; outputGateValue = outGateWeightArray[0]; // Then do the rest for (int i = 1; i < inputVectorSize + 1; ++i) { inputGateValue += inGateWeightArray[i] * inputs[i - 1]; forgetGateValue += forgetGateWeightArray[i] * inputs[i - 1]; outputGateValue += outGateWeightArray[i] * inputs[i - 1]; } } // Calculate the output from the block next; it's important to calculate the new // gate values before calculating the output value. vector Block::calcOutput(vector inputs) { vector out(size); for (int i = 0; i < size; ++i) out[i] = cells[i]->evaluateSample(inputs, inputGateValue, forgetGateValue, outputGateValue); return out; } // Figure out error signals according to LSTM paper vector Block::calcSignals(vector outputSignals, vector< vector > outputWeights, int blockNum) { double internalStateSum = 0.0; for (int i = 0; i < size; ++i) { internalStateSum += h(getInternalState(i)) * calcWeightedSignalSum(outputSignals, outputWeights[i + blockNum]); cellSignals[i] = f(outputGateValue) * hPrime(getInternalState(i)) * calcWeightedSignalSum(outputSignals, outputWeights[i + blockNum]); cells[i]->calcDerivs(inputGateValue, forgetGateValue); } outputGateSignal = fPrime(outputGateValue) * internalStateSum; return cellSignals; } // Update all the weights -- need to update weights going to all the gates, as well as // all the cells. void Block::updateWeights(double eta, vector layerInput) { // Update bias weights first outGateWeightArray[0] += eta * outputGateSignal; double inDerivSum = 0.0; double forgetDerivSum = 0.0; for (int j = 0; j < size; ++j) { forgetDerivSum += cellSignals[j] * cells[j]->getForgetGateDeriv(0); inDerivSum += cellSignals[j] * cells[j]->getInGateDeriv(0); } forgetGateWeightArray[0] += eta * inDerivSum; inGateWeightArray[0] += eta * inDerivSum; // Now do all the others for (int i = 1; i < inputVectorSize + 1; ++i) { outGateWeightArray[i] += eta * outputGateSignal * layerInput[i - 1]; inDerivSum = 0.0; forgetDerivSum = 0.0; for (int j = 0; j < size; ++j) { inDerivSum += cellSignals[j] * cells[j]->getInGateDeriv(i); forgetDerivSum += cellSignals[j] * cells[j]->getForgetGateDeriv(i); // Bias again cells[0]->changeWeight(0, eta * cellSignals[j] * cells[j]->getCellDeriv(0)); cells[j]->changeWeight(i, eta * cellSignals[j] * cells[j]->getCellDeriv(i)); } inGateWeightArray[i] += eta * inDerivSum; forgetGateWeightArray[i] += eta * forgetDerivSum; } } void Block::clear() { inputGateValue = 0.0; forgetGateValue = 0.0; outputGateValue = 0.0; for (int i = 0; i < cells.size(); ++i) cells[i]->clear(); } // Getter functions double Block::getInternalState(int i) { return cells[i]->getCECValue(); } double Block::getInputGateValue() { return inputGateValue; } double Block::getForgetGateValue() { return forgetGateValue; } double Block::getOutputGateValue() { return outputGateValue; } double Block::getWeight(unsigned int cellNum, unsigned int weightNum) { assert(cellNum < size); return cells[cellNum]->getWeight(weightNum); } // Calculate a weighted sum of error signals. double Block::calcWeightedSignalSum(vector signals, vector weights) { double weightedSignalSum = 0.0; assert(signals.size() == weights.size()); for (int i = 0; i < signals.size(); ++i) weightedSignalSum += weights[i] * signals[i]; return weightedSignalSum; } ostream& Block::print(ostream& out) { out.precision(4); out << "Input Gate Weight Array: [ "; for (int i = 0; i < inGateWeightArray.size(); ++i) out << inGateWeightArray[i] << " "; out << "]" << endl; out << "Forget Gate Weight Array: [ "; for (int i = 0; i < forgetGateWeightArray.size(); ++i) out << forgetGateWeightArray[i] << " "; out << "]" << endl; out << "Output Gate Weight Array: [ "; for (int i = 0; i < inGateWeightArray.size(); ++i) out << outGateWeightArray[i] << " "; out << "]" << endl; for (int i = 0; i < cells.size(); ++i) out << "Cell " << i << ": " << *cells[i]; return out; } ostream& operator<<(ostream& out, Block& b) { return b.print(out); }