diff --git a/tmva/tmva/CMakeLists.txt b/tmva/tmva/CMakeLists.txt index ab0812cfefd1d..2f501846feb06 100644 --- a/tmva/tmva/CMakeLists.txt +++ b/tmva/tmva/CMakeLists.txt @@ -16,7 +16,7 @@ set(headers1 Configurable.h Factory.h MethodBase.h MethodCompositeBase.h MethodKNN.h MethodCFMlpANN.h MethodCFMlpANN_Utils.h MethodLikelihood.h MethodHMatrix.h MethodPDERS.h MethodBDT.h MethodDT.h MethodSVM.h MethodBayesClassifier.h MethodFDA.h MethodMLP.h MethodBoost.h - MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h + MethodPDEFoam.h MethodLD.h MethodCategory.h MethodDNN.h MethodDL.h MethodGAN.h MethodCrossValidation.h) set(headers2 TSpline2.h TSpline1.h PDF.h BinaryTree.h BinarySearchTreeNode.h BinarySearchTree.h Timer.h RootFinder.h CrossEntropy.h DecisionTree.h DecisionTreeNode.h MisClassificationError.h diff --git a/tmva/tmva/inc/LinkDef1.h b/tmva/tmva/inc/LinkDef1.h index 47ea5c0aae88e..f87e4416f6f4e 100644 --- a/tmva/tmva/inc/LinkDef1.h +++ b/tmva/tmva/inc/LinkDef1.h @@ -67,5 +67,6 @@ #pragma link C++ class TMVA::MethodDNN+; #pragma link C++ class TMVA::MethodCrossValidation+; #pragma link C++ class TMVA::MethodDL+; +#pragma link C++ class TMVA::MethodGAN+; #endif diff --git a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h index 73b6eda106636..1698a2a1d0243 100644 --- a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h @@ -65,7 +65,7 @@ class TConvLayer : public VGeneralLayer { std::vector fForwardIndices; ///< Vector of indices used for a fast Im2Col in forward pass std::vector fBackwardIndices; ///< Vector of indices used for a fast Im2Col in backward pass - + EActivationFunction fF; ///< Activation function of the layer. ERegularization fReg; ///< The regularization method. @@ -152,7 +152,7 @@ TConvLayer::TConvLayer(size_t batchSize, size_t inputDepth, size size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay) - : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, 1, + : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, "CONV", 1, weightsNRows, weightsNCols, 1, biasesNRows, biasesNCols, outputNSlices, outputNRows, outputNCols, init), fFilterDepth(filterDepth), fFilterHeight(filterHeight), fFilterWidth(filterWidth), fStrideRows(strideRows), @@ -220,43 +220,43 @@ auto TConvLayer::Forward(std::vector &input, bool appl fForwardIndices.resize(this->GetNLocalViews() * this->GetNLocalViewPixels() ); - R__ASSERT( input.size() > 0); + R__ASSERT( input.size() > 0); Architecture_t::Im2colIndices(fForwardIndices, input[0], this->GetNLocalViews(), this->GetInputHeight(), this->GetInputWidth(), this->GetFilterHeight(), this->GetFilterWidth(), this->GetStrideRows(), this->GetStrideCols(), this->GetPaddingHeight(), this->GetPaddingWidth()); - - + + Architecture_t::ConvLayerForward(this->GetOutput(), this->GetDerivatives(), input, this->GetWeightsAt(0), this->GetBiasesAt(0), fF, fForwardIndices, this->GetNLocalViews(), this->GetNLocalViewPixels(), - this->GetDropoutProbability(), applyDropout ); + this->GetDropoutProbability(), applyDropout ); -#if 0 +#if 0 // in printciple I could make the indices data member of the class Matrix_t inputTr(this->GetNLocalViews(), this->GetNLocalViewPixels()); //Matrix_t inputTr2(this->GetNLocalViews(), this->GetNLocalViewPixels()); std::vector vIndices(inputTr.GetNrows() * inputTr.GetNcols() ); - R__ASSERT( input.size() > 0); + R__ASSERT( input.size() > 0); Architecture_t::Im2colIndices(vIndices, input[0], this->GetNLocalViews(), this->GetInputHeight(), this->GetInputWidth(), this->GetFilterHeight(), this->GetFilterWidth(), this->GetStrideRows(), this->GetStrideCols(), this->GetPaddingHeight(), this->GetPaddingWidth()); - // batch size loop + // batch size loop for (size_t i = 0; i < this->GetBatchSize(); i++) { if (applyDropout && (this->GetDropoutProbability() != 1.0)) { Architecture_t::Dropout(input[i], this->GetDropoutProbability()); } - inputTr.Zero(); - //inputTr2.Zero(); + inputTr.Zero(); + //inputTr2.Zero(); // Architecture_t::Im2col(inputTr2, input[i], this->GetInputHeight(), this->GetInputWidth(), this->GetFilterHeight(), // this->GetFilterWidth(), this->GetStrideRows(), this->GetStrideCols(), // this->GetPaddingHeight(), this->GetPaddingWidth()); Architecture_t::Im2colFast(inputTr, input[i], vIndices); - // bool diff = false; - // for (int j = 0; j < inputTr.GetNrows(); ++j) { + // bool diff = false; + // for (int j = 0; j < inputTr.GetNrows(); ++j) { // for (int k = 0; k < inputTr.GetNcols(); ++k) { // if ( inputTr2(j,k) != inputTr(j,k) ) { - // diff = true; + // diff = true; // std::cout << "different im2col for " << j << " , " << k << " " << inputTr(j,k) << " shoud be " << inputTr2(j,k) << std::endl; // } // } @@ -268,15 +268,15 @@ auto TConvLayer::Forward(std::vector &input, bool appl // this->GetPaddingHeight(), this->GetPaddingWidth() ); // // PrintMatrix(inputTr); // //PrintMatrix(inputTr2); - // } - // R__ASSERT(!diff); + // } + // R__ASSERT(!diff); Architecture_t::MultiplyTranspose(this->GetOutputAt(i), this->GetWeightsAt(0), inputTr); Architecture_t::AddConvBiases(this->GetOutputAt(i), this->GetBiasesAt(0)); evaluateDerivative(this->GetDerivativesAt(i), fF, this->GetOutputAt(i)); evaluate(this->GetOutputAt(i), fF); } -#endif +#endif } //______________________________________________________________________________ @@ -335,7 +335,7 @@ void TConvLayer::AddWeightsXMLTo(void *parent) gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction", TString::Itoa(activationFunction, 10)); - // write weights and bias matrix + // write weights and bias matrix this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0)); this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0)); diff --git a/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h b/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h index 98ad5843e6bb1..f9edb3e2b8095 100644 --- a/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h @@ -130,7 +130,7 @@ TMaxPoolLayer::TMaxPoolLayer(size_t batchSize, size_t inputDepth size_t height, size_t width, size_t outputNSlices, size_t outputNRows, size_t outputNCols, size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability) - : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, height, width, 0, 0, 0, + : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, height, width, "MAXPOOL", 0, 0, 0, 0, 0, 0, outputNSlices, outputNRows, outputNCols, EInitialization::kZero), indexMatrix(), fFrameHeight(frameHeight), fFrameWidth(frameWidth), fStrideRows(strideRows), fStrideCols(strideCols), fNLocalViewPixels(inputDepth * frameHeight * frameWidth), fNLocalViews(height * width), @@ -236,7 +236,7 @@ void TMaxPoolLayer::AddWeightsXMLTo(void *parent) template void TMaxPoolLayer::ReadWeightsFromXML(void * /*parent */) { - // all info is read before - nothing to do + // all info is read before - nothing to do } } // namespace CNN diff --git a/tmva/tmva/inc/TMVA/DNN/DeepNet.h b/tmva/tmva/inc/TMVA/DNN/DeepNet.h index 386e5b71cc5a5..ceea7a5207bde 100644 --- a/tmva/tmva/inc/TMVA/DNN/DeepNet.h +++ b/tmva/tmva/inc/TMVA/DNN/DeepNet.h @@ -380,8 +380,8 @@ auto TDeepNet::calculateDimension(int imgDim, int fltDi { Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1; if (!isInteger(dimension) || dimension <= 0) { - this->Print(); - int iLayer = fLayers.size(); + this->Print(); + int iLayer = fLayers.size(); Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d", iLayer, imgDim, fltDim, padding, stride); // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)" @@ -702,7 +702,7 @@ TReshapeLayer *TDeepNet::AddReshapeLaye outputNCols = inputNCols; depth = 1; height = 1; - width = outputNCols; + width = outputNCols; } else { outputNSlices = this->GetBatchSize(); outputNRows = depth; @@ -921,7 +921,7 @@ auto TDeepNet::Backward(std::vector &input, c } // need to have a dummy tensor (size=0) to pass for activation gradient backward which - // are not computed for the first layer + // are not computed for the first layer std::vector dummy; fLayers[0]->Backward(dummy, input, inp1, inp2); } diff --git a/tmva/tmva/inc/TMVA/DNN/DenseLayer.h b/tmva/tmva/inc/TMVA/DNN/DenseLayer.h index 3e33f11668351..2dc99426d760b 100644 --- a/tmva/tmva/inc/TMVA/DNN/DenseLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/DenseLayer.h @@ -126,7 +126,7 @@ template TDenseLayer::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init, Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay) - : VGeneralLayer(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1, + : VGeneralLayer(batchSize, 1, 1, inputWidth, 1, 1, width, "DENSE", 1, width, inputWidth, 1, width, 1, 1, batchSize, width, init), fDerivatives(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay) { @@ -199,7 +199,7 @@ template void TDenseLayer::Print() const { std::cout << " DENSE Layer: \t "; - std::cout << " ( Input = " << this->GetWeightsAt(0).GetNcols(); // input size + std::cout << " ( Input = " << this->GetWeightsAt(0).GetNcols(); // input size std::cout << " , Width = " << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width if (this->GetOutput().size() > 0) { std::cout << "\tOutput = ( " << this->GetOutput().size() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " ) "; @@ -223,7 +223,7 @@ void TDenseLayer::AddWeightsXMLTo(void *parent) int activationFunction = static_cast(this -> GetActivationFunction()); gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction", TString::Itoa(activationFunction, 10)); - // write weights and bias matrix + // write weights and bias matrix this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0)); this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0)); } @@ -235,7 +235,7 @@ void TDenseLayer::ReadWeightsFromXML(void *parent) // Read layer weights and biases from XML this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0)); this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0)); - + } } // namespace DNN diff --git a/tmva/tmva/inc/TMVA/DNN/GeneralLayer.h b/tmva/tmva/inc/TMVA/DNN/GeneralLayer.h index acc1f99cf8e20..2c9c00a6cd5ef 100644 --- a/tmva/tmva/inc/TMVA/DNN/GeneralLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/GeneralLayer.h @@ -58,6 +58,7 @@ class VGeneralLayer { size_t fWidth; ///< The width of this layer. bool fIsTraining; ///< Flag indicatig the mode + TString fLayerType; ///< Type of layer std::vector fWeights; ///< The weights associated to the layer. std::vector fBiases; ///< The biases associated to the layer. @@ -73,13 +74,13 @@ class VGeneralLayer { public: /*! Constructor */ VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, - size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, + size_t Height, size_t Width, TString layerType, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init); /*! General Constructor with different weights dimension */ VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, - size_t Height, size_t Width, size_t WeightsNSlices, std::vector WeightsNRows, + size_t Height, size_t Width, TString layerType, size_t WeightsNSlices, std::vector WeightsNRows, std::vector WeightsNCols, size_t BiasesNSlices, std::vector BiasesNRows, std::vector BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init); @@ -144,6 +145,7 @@ class VGeneralLayer { size_t GetDepth() const { return fDepth; } size_t GetHeight() const { return fHeight; } size_t GetWidth() const { return fWidth; } + TString GetLayerType() const { return fLayerType; } bool IsTraining() const { return fIsTraining; } const std::vector &GetWeights() const { return fWeights; } @@ -192,14 +194,15 @@ class VGeneralLayer { void SetDepth(size_t depth) { fDepth = depth; } void SetHeight(size_t height) { fHeight = height; } void SetWidth(size_t width) { fWidth = width; } + void SetLayerType(TString layerType) { fLayerType = layerType; } void SetIsTraining(bool isTraining) { fIsTraining = isTraining; } /// helper functions for XML - void WriteTensorToXML( void * node, const char * name, const std::vector & tensor); + void WriteTensorToXML( void * node, const char * name, const std::vector & tensor); void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix); void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix); - + }; // @@ -208,13 +211,13 @@ class VGeneralLayer { //_________________________________________________________________________________________________ template VGeneralLayer::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, - size_t depth, size_t height, size_t width, size_t weightsNSlices, - size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices, - size_t biasesNRows, size_t biasesNCols, size_t outputNSlices, + size_t depth, size_t height, size_t width, TString layerType, + size_t weightsNSlices, size_t weightsNRows, size_t weightsNCols, + size_t biasesNSlices, size_t biasesNRows, size_t biasesNCols, size_t outputNSlices, size_t outputNRows, size_t outputNCols, EInitialization init) : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth), - fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(), - fOutput(), fActivationGradients(), fInit(init) + fHeight(height), fWidth(width), fLayerType(layerType), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), + fBiasGradients(), fOutput(), fActivationGradients(), fInit(init) { for (size_t i = 0; i < weightsNSlices; i++) { @@ -236,13 +239,13 @@ VGeneralLayer::VGeneralLayer(size_t batchSize, size_t inputDepth //_________________________________________________________________________________________________ template VGeneralLayer::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, - size_t depth, size_t height, size_t width, size_t weightsNSlices, - std::vector weightsNRows, std::vector weightsNCols, + size_t depth, size_t height, size_t width, TString layerType, + size_t weightsNSlices, std::vector weightsNRows, std::vector weightsNCols, size_t biasesNSlices, std::vector biasesNRows, std::vector biasesNCols, size_t outputNSlices, size_t outputNRows, size_t outputNCols, EInitialization init) : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth), - fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(), + fHeight(height), fWidth(width), fLayerType(layerType), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(), fOutput(), fActivationGradients(), fInit(init) { @@ -267,8 +270,8 @@ template VGeneralLayer::VGeneralLayer(VGeneralLayer *layer) : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()), fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()), - fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(), - fBiasGradients(), fOutput(), fActivationGradients(), fInit(layer->GetInitialization()) + fWidth(layer->GetWidth()), fLayerType(layer->GetLayerType()), fIsTraining(layer->IsTraining()), fWeights(), + fBiases(), fWeightGradients(),fBiasGradients(), fOutput(), fActivationGradients(), fInit(layer->GetInitialization()) { size_t weightsNSlices = (layer->GetWeights()).size(); size_t weightsNRows = 0; @@ -316,8 +319,8 @@ template VGeneralLayer::VGeneralLayer(const VGeneralLayer &layer) : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight), fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth), - fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(), fOutput(), - fActivationGradients(), fInit(layer.fInit) + fLayerType(layer.fLayerType), fIsTraining(layer.fIsTraining), fWeights(), fBiases(), + fWeightGradients(), fBiasGradients(), fOutput(),fActivationGradients(), fInit(layer.fInit) { size_t weightsNSlices = layer.fWeights.size(); size_t weightsNRows = 0; @@ -454,9 +457,9 @@ auto VGeneralLayer::CopyBiases(const std::vector &othe template auto VGeneralLayer::WriteTensorToXML(void * node, const char * name, const std::vector & tensor) -> void { - auto xmlengine = gTools().xmlengine(); + auto xmlengine = gTools().xmlengine(); void* matnode = xmlengine.NewChild(node, 0, name); - if (tensor.size() == 0) return; + if (tensor.size() == 0) return; xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) ); // assume same number of rows and columns for every matrix in std::vector xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) ); @@ -467,7 +470,7 @@ auto VGeneralLayer::WriteTensorToXML(void * node, const char * n for (Int_t row = 0; row < mat.GetNrows(); row++) { for (Int_t col = 0; col < mat.GetNcols(); col++) { TString tmp = TString::Format( "%5.15e ", (mat)(row,col) ); - s << tmp.Data(); + s << tmp.Data(); } } } @@ -478,7 +481,7 @@ auto VGeneralLayer::WriteTensorToXML(void * node, const char * n template auto VGeneralLayer::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void { - auto xmlengine = gTools().xmlengine(); + auto xmlengine = gTools().xmlengine(); void* matnode = xmlengine.NewChild(node, 0, name); xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(matrix.GetNrows()) ); @@ -506,8 +509,8 @@ auto VGeneralLayer::ReadMatrixXML(void * node, const char * name gTools().ReadAttr(matrixXML, "Rows", rows); gTools().ReadAttr(matrixXML, "Columns", cols); - R__ASSERT((size_t) matrix.GetNrows() == rows); - R__ASSERT((size_t) matrix.GetNcols() == cols); + R__ASSERT((size_t) matrix.GetNrows() == rows); + R__ASSERT((size_t) matrix.GetNcols() == cols); const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML); std::stringstream matrixStringStream(matrixString); diff --git a/tmva/tmva/inc/TMVA/DNN/RNN/RNNLayer.h b/tmva/tmva/inc/TMVA/DNN/RNN/RNNLayer.h index b31c7759fb07c..4991cb8d17ef3 100644 --- a/tmva/tmva/inc/TMVA/DNN/RNN/RNNLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/RNN/RNNLayer.h @@ -129,7 +129,7 @@ template /*! Read the information and the weights about the layer from XML node. */ virtual void ReadWeightsFromXML(void *parent); - + /** Getters */ size_t GetTimeSteps() const { return fTimeSteps; } @@ -166,7 +166,7 @@ TBasicRNNLayer::TBasicRNNLayer(size_t batchSize, size_t stateSiz bool rememberState, DNN::EActivationFunction f, bool /*training*/, DNN::EInitialization fA) // TODO inputDepth and outputDepth changed to batchSize?? - : VGeneralLayer(batchSize, 1, timeSteps, inputSize, 1, timeSteps, stateSize, 2, + : VGeneralLayer(batchSize, 1, timeSteps, inputSize, 1, timeSteps, stateSize, "RNN", 2, {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1}, batchSize, timeSteps, stateSize, fA), fTimeSteps(timeSteps), @@ -256,12 +256,12 @@ auto inline TBasicRNNLayer::Forward(Tensor_t &input, bool /*isTr // H : state size // T : time size // B : batch size - + Tensor_t arrInput; for (size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D Architecture_t::Rearrange(arrInput, input); Tensor_t arrOutput; - for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H + for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H if (!this->fRememberState) InitState(DNN::EInitialization::kZero); for (size_t t = 0; t < fTimeSteps; ++t) { @@ -332,7 +332,7 @@ auto inline TBasicRNNLayer::Backward(Tensor_t &gradients_backwar // reinitialize weights and biases gradients to 0 fWeightInputGradients.Zero(); fWeightStateGradients.Zero(); - fBiasGradients.Zero(); + fBiasGradients.Zero(); for (size_t t = fTimeSteps; t > 0; t--) { //const Matrix_t & currStateActivations = arr_output[t - 1]; diff --git a/tmva/tmva/inc/TMVA/DNN/ReshapeLayer.h b/tmva/tmva/inc/TMVA/DNN/ReshapeLayer.h index 409ba49dc77a4..521ed97cb4002 100644 --- a/tmva/tmva/inc/TMVA/DNN/ReshapeLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/ReshapeLayer.h @@ -93,8 +93,8 @@ template TReshapeLayer::TReshapeLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t outputNSlices, size_t outputNRows, size_t outputNCols, bool flattening) - : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, 0, 0, 0, 0, 0, - 0, outputNSlices, outputNRows, outputNCols, EInitialization::kZero), + : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, "RESHAPE", 0, 0, + 0, 0, 0, 0, outputNSlices, outputNRows, outputNCols, EInitialization::kZero), fFlattening(flattening) { if (this->GetInputDepth() * this->GetInputHeight() * this->GetInputWidth() != @@ -152,7 +152,7 @@ auto TReshapeLayer::Backward(std::vector &gradients_ba /*inp2*/) -> void { // in case of first layer size is zero - do nothing - if (gradients_backward.size() == 0) return; + if (gradients_backward.size() == 0) return; if (fFlattening) { size_t size = gradients_backward.size(); size_t nRows = gradients_backward[0].GetNrows(); diff --git a/tmva/tmva/inc/TMVA/DNN/TensorDataLoader.h b/tmva/tmva/inc/TMVA/DNN/TensorDataLoader.h index b0134c7e8ec73..178b3e63f8c9e 100644 --- a/tmva/tmva/inc/TMVA/DNN/TensorDataLoader.h +++ b/tmva/tmva/inc/TMVA/DNN/TensorDataLoader.h @@ -32,7 +32,7 @@ #include namespace TMVA { - class DataSetInfo; + class DataSetInfo; namespace DNN { // @@ -57,6 +57,8 @@ template class TTensorBatch { public: using Matrix_t = typename Architecture_t::Matrix_t; + using TensorInput = + std::tuple &, const Matrix_t &, const Matrix_t &>; private: std::vector fInputTensor; ///< The input tensor batch, one matrix one input. @@ -132,6 +134,8 @@ class TTensorDataLoader { using HostBuffer_t = typename Architecture_t::HostBuffer_t; using DeviceBuffer_t = typename Architecture_t::DeviceBuffer_t; using Matrix_t = typename Architecture_t::Matrix_t; + using TensorInput = + std::tuple &, const Matrix_t &, const Matrix_t &>; using BatchIterator_t = TTensorBatchIterator; const Data_t &fData; ///< The data that should be loaded in the batches. @@ -247,7 +251,7 @@ TTensorBatch TTensorDataLoader::GetTenso // here sample index has batch size as offset , while in // copy tensor input has batch depth. // We support then now two cases: batchdepth = 1 batchHeight = batch size - // or batch depth = batch size + // or batch depth = batch size size_t sampleIndex = fBatchIndex * fBatchSize; IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex; diff --git a/tmva/tmva/inc/TMVA/MethodDL.h b/tmva/tmva/inc/TMVA/MethodDL.h index c5e24bf586df5..1d5bead08b8d8 100644 --- a/tmva/tmva/inc/TMVA/MethodDL.h +++ b/tmva/tmva/inc/TMVA/MethodDL.h @@ -74,6 +74,9 @@ struct TTrainingSettings { class MethodDL : public MethodBase { +public: + friend class MethodGAN; + private: // Key-Value vector type, contining the values for the training options using KeyValueVector_t = std::vector>; @@ -101,32 +104,32 @@ class MethodDL : public MethodBase { * a reference in the function. */ template void CreateDeepNet(DNN::TDeepNet &deepNet, - std::vector> &nets); + std::vector> &nets, std::unique_ptr &modelNet); template - void ParseDenseLayer(DNN::TDeepNet &deepNet, - std::vector> &nets, TString layerString, TString delim); + static void ParseDenseLayer(size_t inputSize, DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, TString delim, std::unique_ptr &modelNet); template - void ParseConvLayer(DNN::TDeepNet &deepNet, - std::vector> &nets, TString layerString, TString delim); + static void ParseConvLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, TString delim, std::unique_ptr &modelNet); template - void ParseMaxPoolLayer(DNN::TDeepNet &deepNet, + static void ParseMaxPoolLayer(DNN::TDeepNet &deepNet, std::vector> &nets, TString layerString, - TString delim); + TString delim, std::unique_ptr &modelNet); template - void ParseReshapeLayer(DNN::TDeepNet &deepNet, + static void ParseReshapeLayer(DNN::TDeepNet &deepNet, std::vector> &nets, TString layerString, - TString delim); + TString delim, std::unique_ptr &modelNet); template - void ParseRnnLayer(DNN::TDeepNet &deepNet, - std::vector> &nets, TString layerString, TString delim); + static void ParseRnnLayer(DNN::TDeepNet &deepNet, + std::vector> &nets, TString layerString, TString delim, std::unique_ptr &modelNet); template - void ParseLstmLayer(DNN::TDeepNet &deepNet, + static void ParseLstmLayer(DNN::TDeepNet &deepNet, std::vector> &nets, TString layerString, TString delim); size_t fInputDepth; ///< The depth of the input. diff --git a/tmva/tmva/inc/TMVA/MethodGAN.h b/tmva/tmva/inc/TMVA/MethodGAN.h new file mode 100644 index 0000000000000..4929e1395ae94 --- /dev/null +++ b/tmva/tmva/inc/TMVA/MethodGAN.h @@ -0,0 +1,294 @@ +// @(#)root/tmva/tmva/dnn:$Id$ +// Author: Anushree Rankawat + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : MethodGAN * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Generative Adversarial Networks * + * * + * Authors (alphabetical): * + * Anushree Rankawat * + * * + * Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef ROOT_TMVA_MethodGAN +#define ROOT_TMVA_MethodGAN + +////////////////////////////////////////////////////////////////////////// +// // +// MethodGAN // +// // +// Method class for all Generative Adversarial Networks // +// // +////////////////////////////////////////////////////////////////////////// + +#include "TString.h" + +#include "TMVA/MethodDL.h" +#include "TMVA/Types.h" +#include "TMVA/DNN/TensorDataLoader.h" + +#include "TMVA/DNN/Architectures/Cpu.h" + +#ifdef R__HAS_TMVACPU +#include "TMVA/DNN/Architectures/Cpu.h" +#endif + +#ifdef R__HAS_TMVACUDA +#include "TMVA/DNN/Architectures/Cuda.h" +#endif + +#ifdef R__HAS_TMVACPU + using ArchitectureImpl_t = TMVA::DNN::TCpu; +#else + using ArchitectureImpl_t = TMVA::DNN::TReference; +#endif +using DeepNetImpl_t = TMVA::DNN::TDeepNet; + +#include "TMVA/DNN/Architectures/Reference.h" +#include "TMVA/DNN/Functions.h" +#include "TMVA/DNN/DeepNet.h" + +#include + +using namespace TMVA; +using namespace TMVA::DNN::CNN; +using namespace TMVA::DNN; + +using Architecture_t = TCpu; +using Scalar_t = Architecture_t::Scalar_t; +using DeepNet_t = TMVA::DNN::TDeepNet; +using TensorDataLoader_t = TTensorDataLoader; + +using TMVA::DNN::EActivationFunction; +using TMVA::DNN::ELossFunction; +using TMVA::DNN::EInitialization; +using TMVA::DNN::EOutputFunction; + +namespace TMVA { + +/*! All of the options that can be specified in the training string */ +struct GANTTrainingSettings { + size_t maxEpochs; + size_t generatorBatchSize; + size_t generatorTestInterval; + size_t generatorConvergenceSteps; + DNN::ERegularization generatorRegularization; + Double_t generatorLearningRate; + Double_t generatorMomentum; + Double_t generatorWeightDecay; + std::vector generatorDropoutProbabilities; + bool generatorMultithreading; + + size_t discriminatorBatchSize; + size_t discriminatorTestInterval; + size_t discriminatorConvergenceSteps; + DNN::ERegularization discriminatorRegularization; + Double_t discriminatorLearningRate; + Double_t discriminatorMomentum; + Double_t discriminatorWeightDecay; + std::vector discriminatorDropoutProbabilities; + bool discriminatorMultithreading; + +}; + +class MethodGAN : public MethodBase { + +private: + // Key-Value vector type, contining the values for the training options + using KeyValueVector_t = std::vector>; + std::unique_ptr generatorFNet, discriminatorFNet, combinedFNet; + using Matrix_t = typename ArchitectureImpl_t::Matrix_t; + + /*! The option handling methods */ + void DeclareOptions(); + void ProcessOptions(); + + void Init(); + + // Function to parse the layout of the input + void ParseNetworkLayout(); + void ParseInputLayout(); + void ParseBatchLayout(); + + /*! After calling the ProcesOptions(), all of the options are parsed, + * so using the parsed options, and given the architecture and the + * type of the layers, we build the Deep Network passed as + * a reference in the function. */ + template + void CreateDeepNet(DNN::TDeepNet &deepNet, + std::vector> &nets, std::unique_ptr &modelNet, TString layoutString); + + size_t fGeneratorInputDepth; ///< The depth of the input of the generator. + size_t fGeneratorInputHeight; ///< The height of the input of the generator. + size_t fGeneratorInputWidth; ///< The width of the input of the generator. + + size_t fGeneratorBatchDepth; ///< The depth of the batch used to train the deep net for generator. + size_t fGeneratorBatchHeight; ///< The height of the batch used to train the deep net for generator. + size_t fGeneratorBatchWidth; ///< The width of the batch used to train the deep net for generator. + + size_t fDiscriminatorInputDepth; ///< The depth of the input of the discriminator. + size_t fDiscriminatorInputHeight; ///< The height of the input of the discriminator. + size_t fDiscriminatorInputWidth; ///< The width of the input of the discriminator. + + size_t fDiscriminatorBatchDepth; ///< The depth of the batch used to train the deep net for discriminator. + size_t fDiscriminatorBatchHeight; ///< The height of the batch used to train the deep net for discriminator. + size_t fDiscriminatorBatchWidth; ///< The width of the batch used to train the deep net for discriminator. + + + DNN::EInitialization fWeightInitialization; ///< The initialization method + DNN::EOutputFunction fOutputFunction; ///< The output function for making the predictions + DNN::ELossFunction fLossFunction; ///< The loss function + + TString fInputLayoutString; ///< The string defining the layout of the input + TString fBatchLayoutString; ///< The string defining the layout of the batch + TString fLayoutString; ///< The string defining the layout of the deep net + TString fErrorStrategy; ///< The string defining the error strategy for training + TString fTrainingStrategyString; ///< The string defining the training strategy + TString fWeightInitializationString; ///< The string defining the weight initialization method + TString fArchitectureString; ///< The string defining the architecure: CPU or GPU + TString fGeneratorNetworkLayoutString; ///< The string defining the network layout for generator + TString fDiscriminatorNetworkLayoutString; ///< The string defining the network layout for discriminator + bool fResume; + + KeyValueVector_t fSettings; ///< Map for the training strategy + std::vector fTrainingSettings; ///< The vector defining each training strategy + + ClassDef(MethodGAN, 0); + +protected: + // provide a help message + void GetHelpMessage() const; + +public: + /*! Constructor */ + MethodGAN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption); + + /*! Constructor */ + MethodGAN(DataSetInfo &theData, const TString &theWeightFile); + + /*! Virtual Destructor */ + virtual ~MethodGAN(); + + /*! Function for parsing the training settings, provided as a string + * in a key-value form. */ + KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim); + + /*! Check the type of analysis the deep learning network can do */ + Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets); + + /*! Methods for training the deep learning network */ + void Train(); + + Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0); + Double_t GetMvaValueGAN(std::unique_ptr & modelNet, Double_t *err = 0, Double_t *errUpper = 0); + void CreateNoisyMatrices(std::vector> &inputTensor, TMatrixT &outputMatrix, TMatrixT &weights, DeepNet_t &DeepNet, size_t nSamples, size_t classLabel); + Double_t ComputeLoss(TTensorDataLoader &generalDataloader, DeepNet_t &DeepNet); + Double_t ComputeLoss(TTensorDataLoader &generalDataloader, DeepNet_t &DeepNet); + void CreateDiscriminatorFakeData(std::vector> &predTensor, TMatrixT &outputMatrix, TMatrixT &weights, TTensorDataLoader &trainingData, DeepNet_t &genDeepNet, DeepNet_t &disDeepNet, EOutputFunction outputFunction, size_t nSamples, size_t classLabel, size_t epochs); + void CombineGAN(DeepNet_t &combinedDeepNet, DeepNet_t &generatorNet, DeepNet_t &discriminatorNet, std::unique_ptr & combinedNet); + void SetDiscriminatorLayerTraining(DeepNet_t &discrimatorNet); + + /*! Methods for writing and reading weights */ + using MethodBase::ReadWeightsFromStream; + void AddWeightsXMLTo(void *parent) const; + void AddWeightsXMLToGenerator(void *parent) const; + void AddWeightsXMLToDiscriminator(void *parent) const; + void ReadWeightsFromXML(void *wghtnode); + void ReadWeightsFromXMLGenerator(void *rootXML); + void ReadWeightsFromXMLDiscriminator(void *rootXML); + void ReadWeightsFromStream(std::istream &); + + /* Create ranking */ + const Ranking *CreateRanking(); + + /* Getters */ + size_t GetGeneratorInputDepth() const { return fGeneratorInputDepth; } + size_t GetGeneratorInputHeight() const { return fGeneratorInputHeight; } + size_t GetGeneratorInputWidth() const { return fGeneratorInputWidth; } + + size_t GetGeneratorBatchDepth() const { return fGeneratorBatchDepth; } + size_t GetGeneratorBatchHeight() const { return fGeneratorBatchHeight; } + size_t GetGeneratorBatchWidth() const { return fGeneratorBatchWidth; } + + size_t GetDiscriminatorInputDepth() const { return fDiscriminatorInputDepth; } + size_t GetDiscriminatorInputHeight() const { return fDiscriminatorInputHeight; } + size_t GetDiscriminatorInputWidth() const { return fDiscriminatorInputWidth; } + + size_t GetDiscriminatorBatchDepth() const { return fDiscriminatorBatchDepth; } + size_t GetDiscriminatorBatchHeight() const { return fDiscriminatorBatchHeight; } + size_t GetDiscriminatorBatchWidth() const { return fDiscriminatorBatchWidth; } + + DNN::EInitialization GetWeightInitialization() const { return fWeightInitialization; } + DNN::EOutputFunction GetOutputFunction() const { return fOutputFunction; } + DNN::ELossFunction GetLossFunction() const { return fLossFunction; } + + TString GetInputLayoutString() const { return fInputLayoutString; } + TString GetBatchLayoutString() const { return fBatchLayoutString; } + TString GetLayoutString() const { return fLayoutString; } + TString GetErrorStrategyString() const { return fErrorStrategy; } + TString GetTrainingStrategyString() const { return fTrainingStrategyString; } + TString GetWeightInitializationString() const { return fWeightInitializationString; } + TString GetArchitectureString() const { return fArchitectureString; } + + TString GetGeneratorNetworkLayoutString() const {return fGeneratorNetworkLayoutString; } + TString GetDiscriminatorNetworkLayoutString() const {return fDiscriminatorNetworkLayoutString; } + + const std::vector &GetTrainingSettings() const { return fTrainingSettings; } + std::vector &GetTrainingSettings() { return fTrainingSettings; } + const KeyValueVector_t &GetKeyValueSettings() const { return fSettings; } + KeyValueVector_t &GetKeyValueSettings() { return fSettings; } + + /** Setters */ + void SetGeneratorInputDepth(size_t inputDepth) { fGeneratorInputDepth = inputDepth; } + void SetGeneratorInputHeight(size_t inputHeight) { fGeneratorInputHeight = inputHeight; } + void SetGeneratorInputWidth(size_t inputWidth) { fGeneratorInputWidth = inputWidth; } + + void SetGeneratorBatchDepth(size_t batchDepth) { fGeneratorBatchDepth = batchDepth; } + void SetGeneratorBatchHeight(size_t batchHeight) { fGeneratorBatchHeight = batchHeight; } + void SetGeneratorBatchWidth(size_t batchWidth) { fGeneratorBatchWidth = batchWidth; } + + void SetDiscriminatorInputDepth(size_t inputDepth) { fDiscriminatorInputDepth = inputDepth; } + void SetDiscriminatorInputHeight(size_t inputHeight) { fDiscriminatorInputHeight = inputHeight; } + void SetDiscriminatorInputWidth(size_t inputWidth) { fDiscriminatorInputWidth = inputWidth; } + + void SetDiscriminatorBatchDepth(size_t batchDepth) { fDiscriminatorBatchDepth = batchDepth; } + void SetDiscriminatorBatchHeight(size_t batchHeight) { fDiscriminatorBatchHeight = batchHeight; } + void SetDiscriminatorBatchWidth(size_t batchWidth) { fDiscriminatorBatchWidth = batchWidth; } + + void SetWeightInitialization(DNN::EInitialization weightInitialization) + { + fWeightInitialization = weightInitialization; + } + void SetOutputFunction(DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; } + void SetErrorStrategyString(TString errorStrategy) { fErrorStrategy = errorStrategy; } + void SetTrainingStrategyString(TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; } + void SetWeightInitializationString(TString weightInitializationString) + { + fWeightInitializationString = weightInitializationString; + } + void SetArchitectureString(TString architectureString) { fArchitectureString = architectureString; } + void SetLayoutString(TString layoutString) { fLayoutString = layoutString; } + + void SetGeneratorNetworkLayout(TString networkLayoutString) { fGeneratorNetworkLayoutString = networkLayoutString; } + + void SetDiscriminatorNetworkLayout(TString networkLayoutString) { fDiscriminatorNetworkLayoutString = networkLayoutString; } + +}; + +} // namespace TMVA + +#endif diff --git a/tmva/tmva/inc/TMVA/Types.h b/tmva/tmva/inc/TMVA/Types.h index 1b3fe6b0fb817..5215f8e9b0ac2 100644 --- a/tmva/tmva/inc/TMVA/Types.h +++ b/tmva/tmva/inc/TMVA/Types.h @@ -1,4 +1,4 @@ -// @(#)root/tmva $Id$ +// @(#)root/tmva $Id$ // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss /********************************************************************************** @@ -17,9 +17,9 @@ * Helge Voss - MPI-K Heidelberg, Germany * * * * Copyright (c) 2005: * - * CERN, Switzerland * - * U. of Victoria, Canada * - * MPI-K Heidelberg, Germany * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * @@ -69,7 +69,7 @@ namespace TMVA { //Variable Importance type enum VIType {kShort=0,kAll=1,kRandom=2}; - + class Types { public: @@ -98,6 +98,7 @@ namespace TMVA { kPlugins , kCategory , kDNN , + kGAN , kDL , kPyRandomForest , kPyAdaBoost , @@ -143,9 +144,9 @@ namespace TMVA { enum ETreeType { kTraining = 0, kTesting, - kMaxTreeType, // also used as temporary storage for trees not yet assigned for testing;training... + kMaxTreeType, // also used as temporary storage for trees not yet assigned for testing;training... kValidation, // these are placeholders... currently not used, but could be moved "forward" if - kTrainingOriginal // ever needed + kTrainingOriginal // ever needed }; enum EBoostStage { diff --git a/tmva/tmva/src/DNN/Architectures/Reference/TensorDataLoader.cxx b/tmva/tmva/src/DNN/Architectures/Reference/TensorDataLoader.cxx index fdfcad943d3a4..bb2ab73258348 100644 --- a/tmva/tmva/src/DNN/Architectures/Reference/TensorDataLoader.cxx +++ b/tmva/tmva/src/DNN/Architectures/Reference/TensorDataLoader.cxx @@ -266,7 +266,7 @@ void TTensorDataLoader>::CopyTensorOutput(TMat const DataSetInfo &info = std::get<1>(fData); Int_t n = matrix.GetNcols(); - for (size_t i = 0; i < fBatchSize; i++) { + for (size_t i = 0; i < fBatchSize; i++) { size_t sampleIndex = *sampleIterator++; Event *event = std::get<0>(fData)[sampleIndex]; diff --git a/tmva/tmva/src/MethodBase.cxx b/tmva/tmva/src/MethodBase.cxx index a57c79d3d195c..b139b28f0f6fc 100644 --- a/tmva/tmva/src/MethodBase.cxx +++ b/tmva/tmva/src/MethodBase.cxx @@ -764,8 +764,8 @@ void TMVA::MethodBase::AddRegressionOutput(Types::ETreeType type) regRes->Resize( nEvents ); // Drawing the progress bar every event was causing a huge slowdown in the evaluation time - // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100 - + // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100 + Int_t totalProgressDraws = 100; // total number of times to update the progress bar Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws if(nEvents >= totalProgressDraws) drawProgressEvery = nEvents/totalProgressDraws; @@ -1570,7 +1570,7 @@ void TMVA::MethodBase::ReadStateFromXML( void* methodNode ) fMVAPdfB->ReadXML(pdfnode); } } - else if (nodeName=="Weights") { + else if (nodeName.SubString("Weights")=="Weights") { ReadWeightsFromXML(ch); } else { @@ -1994,7 +1994,7 @@ TDirectory* TMVA::MethodBase::BaseDir() const sdir = methodDir->mkdir(defaultDir); sdir->cd(); // write weight file name into target file - if (fModelPersistence) { + if (fModelPersistence) { TObjString wfilePath( gSystem->WorkingDirectory() ); TObjString wfileName( GetWeightFileName() ); wfilePath.Write( "TrainingPath" ); diff --git a/tmva/tmva/src/MethodDL.cxx b/tmva/tmva/src/MethodDL.cxx index 66f1e95a5613b..a265772c6a0ec 100644 --- a/tmva/tmva/src/MethodDL.cxx +++ b/tmva/tmva/src/MethodDL.cxx @@ -439,7 +439,7 @@ void MethodDL::ParseBatchLayout() /// Create a deep net based on the layout string template void MethodDL::CreateDeepNet(DNN::TDeepNet &deepNet, - std::vector> &nets) + std::vector> &nets, std::unique_ptr &modelNet) { // Layer specification, layer details const TString layerDelimiter(","); @@ -462,17 +462,18 @@ void MethodDL::CreateDeepNet(DNN::TDeepNet &deepNet, // Determine the type of the layer TString strLayerType = token->GetString(); + const size_t inputSize = GetEvent()->GetNVariables(); if (strLayerType == "DENSE") { - ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter); + ParseDenseLayer(inputSize, deepNet, nets, layerString->GetString(), subDelimiter, modelNet); } else if (strLayerType == "CONV") { - ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter); + ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); } else if (strLayerType == "MAXPOOL") { - ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter); + ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); } else if (strLayerType == "RESHAPE") { - ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter); + ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); } else if (strLayerType == "RNN") { - ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter); + ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); } else if (strLayerType == "LSTM") { Log() << kFATAL << "LSTM Layer is not yet fully implemented" << Endl; //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter); @@ -483,16 +484,13 @@ void MethodDL::CreateDeepNet(DNN::TDeepNet &deepNet, //////////////////////////////////////////////////////////////////////////////// /// Pases the layer string and creates the appropriate dense layer template -void MethodDL::ParseDenseLayer(DNN::TDeepNet &deepNet, - std::vector> & /*nets*/, TString layerString, - TString delim) +void MethodDL::ParseDenseLayer(size_t inputSize, DNN::TDeepNet &deepNet, + std::vector> & nets, TString layerString, + TString delim, std::unique_ptr &modelNet) { int width = 0; EActivationFunction activationFunction = EActivationFunction::kTanh; - // not sure about this - const size_t inputSize = GetNvar(); - // Split layer details TObjArray *subStrings = layerString.Tokenize(delim); TIter nextToken(subStrings); @@ -539,15 +537,15 @@ void MethodDL::ParseDenseLayer(DNN::TDeepNet &deepNet, TDenseLayer *denseLayer = deepNet.AddDenseLayer(width, activationFunction); denseLayer->Initialize(); - // add same layer to fNet - fNet->AddDenseLayer(width, activationFunction); + // add same layer to modelNet + modelNet->AddDenseLayer(width, activationFunction); - //TDenseLayer *copyDenseLayer = new TDenseLayer(*denseLayer); + TDenseLayer *copyDenseLayer = new TDenseLayer(*denseLayer); // add the copy to all slave nets - //for (size_t i = 0; i < nets.size(); i++) { - // nets[i].AddDenseLayer(copyDenseLayer); - //} + for (size_t i = 0; i < nets.size(); i++) { + nets[i].AddDenseLayer(copyDenseLayer); + } // check compatibility of added layer // for a dense layer input should be 1 x 1 x DxHxW @@ -557,8 +555,8 @@ void MethodDL::ParseDenseLayer(DNN::TDeepNet &deepNet, /// Pases the layer string and creates the appropriate convolutional layer template void MethodDL::ParseConvLayer(DNN::TDeepNet &deepNet, - std::vector> & /*nets*/, TString layerString, - TString delim) + std::vector> & nets, TString layerString, + TString delim, std::unique_ptr &modelNet) { int depth = 0; int fltHeight = 0; @@ -640,24 +638,24 @@ void MethodDL::ParseConvLayer(DNN::TDeepNet &deepNet, zeroPadHeight, zeroPadWidth, activationFunction); convLayer->Initialize(); - // Add same layer to fNet - fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols, + // Add same layer to modelNet + modelNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols, zeroPadHeight, zeroPadWidth, activationFunction); - //TConvLayer *copyConvLayer = new TConvLayer(*convLayer); + TConvLayer *copyConvLayer = new TConvLayer(*convLayer); //// add the copy to all slave nets - //for (size_t i = 0; i < nets.size(); i++) { - // nets[i].AddConvLayer(copyConvLayer); - //} + for (size_t i = 0; i < nets.size(); i++) { + nets[i].AddConvLayer(copyConvLayer); + } } //////////////////////////////////////////////////////////////////////////////// /// Pases the layer string and creates the appropriate max pool layer template void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet &deepNet, - std::vector> & /*nets*/, TString layerString, - TString delim) + std::vector> & nets, TString layerString, + TString delim, std::unique_ptr &modelNet) { int frameHeight = 0; @@ -701,8 +699,8 @@ void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet &deepNet // TMaxPoolLayer *maxPoolLayer = deepNet.AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); - // Add the same layer to fNet - fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + // Add the same layer to modelNet + modelNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); //TMaxPoolLayer *copyMaxPoolLayer = new TMaxPoolLayer(*maxPoolLayer); @@ -716,8 +714,8 @@ void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet &deepNet /// Pases the layer string and creates the appropriate reshape layer template void MethodDL::ParseReshapeLayer(DNN::TDeepNet &deepNet, - std::vector> & /*nets*/, TString layerString, - TString delim) + std::vector> & nets, TString layerString, + TString delim, std::unique_ptr &modelNet) { int depth = 0; int height = 0; @@ -762,8 +760,8 @@ void MethodDL::ParseReshapeLayer(DNN::TDeepNet &deepNet // TReshapeLayer *reshapeLayer = deepNet.AddReshapeLayer(depth, height, width, flattening); - // Add the same layer to fNet - fNet->AddReshapeLayer(depth, height, width, flattening); + // Add the same layer to modelNet + modelNet->AddReshapeLayer(depth, height, width, flattening); //TReshapeLayer *copyReshapeLayer = new TReshapeLayer(*reshapeLayer); @@ -777,8 +775,8 @@ void MethodDL::ParseReshapeLayer(DNN::TDeepNet &deepNet /// Pases the layer string and creates the appropriate rnn layer template void MethodDL::ParseRnnLayer(DNN::TDeepNet & deepNet, - std::vector> & /*nets */, TString layerString, - TString delim) + std::vector> & nets, TString layerString, + TString delim, std::unique_ptr &modelNet) { // int depth = 0; int stateSize = 0; @@ -823,22 +821,22 @@ void MethodDL::ParseRnnLayer(DNN::TDeepNet & deepNet, timeSteps, rememberState); basicRNNLayer->Initialize(); - // Add same layer to fNet - fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState); + // Add same layer to modelNet + modelNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState); - //TBasicRNNLayer *copyRNNLayer = new TBasicRNNLayer(*basicRNNLayer); + TBasicRNNLayer *copyRNNLayer = new TBasicRNNLayer(*basicRNNLayer); //// add the copy to all slave nets - //for (size_t i = 0; i < nets.size(); i++) { - // nets[i].AddBasicRNNLayer(copyRNNLayer); - //} + for (size_t i = 0; i < nets.size(); i++) { + nets[i].AddBasicRNNLayer(copyRNNLayer); + } } //////////////////////////////////////////////////////////////////////////////// /// Pases the layer string and creates the appropriate lstm layer template void MethodDL::ParseLstmLayer(DNN::TDeepNet & /*deepNet*/, - std::vector> & /*nets*/, TString layerString, + std::vector> & nets, TString layerString, TString delim) { // Split layer details @@ -1060,7 +1058,7 @@ void MethodDL::Train() } // Add all appropriate layers to deepNet and copies to fNet - CreateDeepNet(deepNet, nets); + CreateDeepNet(deepNet, nets, fNet); // print the created network std::cout << "***** Deep Learning Network *****\n"; @@ -1357,11 +1355,8 @@ void MethodDL::AddWeightsXMLTo(void * parent) const { fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn); } - - } - -//////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// void MethodDL::ReadWeightsFromXML(void * rootXML) { std::cout << "READ DL network from XML " << std::endl; diff --git a/tmva/tmva/src/MethodGAN.cxx b/tmva/tmva/src/MethodGAN.cxx new file mode 100644 index 0000000000000..11775d8c2c542 --- /dev/null +++ b/tmva/tmva/src/MethodGAN.cxx @@ -0,0 +1,1928 @@ +// @(#)root/tmva/tmva/cnn:$Id$ +// Author: Anushree Rankawat + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : MethodGAN * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Generative Adversarial Networks Method * + * * + * Authors (alphabetical): * + * Anushree Rankawat * + * * + * Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * +*////////////////////////////////////////////////////////////////////////////////// + +#include +#include "TFormula.h" +#include "TString.h" +#include "TMath.h" +#include "TRandom.h" + +#include "TMVA/Tools.h" +#include "TMVA/Configurable.h" +#include "TMVA/IMethod.h" +#include "TMVA/ClassifierFactory.h" +#include "TMVA/MethodGAN.h" +#include "TMVA/Types.h" +#include "TMVA/DNN/TensorDataLoader.h" +#include "TMVA/DNN/Architectures/Reference.h" +#include "TMVA/DNN/Functions.h" +#include "TMVA/DNN/DLMinimizers.h" +#include "TStopwatch.h" +#include "TMVA/MethodDL.h" + +#include + +REGISTER_METHOD(GAN) +ClassImp(TMVA::MethodGAN); + +using namespace TMVA; +using namespace TMVA::DNN::CNN; +using namespace TMVA::DNN; + +using Architecture_t = TCpu; +using Scalar_t = Architecture_t::Scalar_t; +using DeepNet_t = TMVA::DNN::TDeepNet; +using TensorDataLoader_t = TTensorDataLoader; + +using TMVA::DNN::EActivationFunction; +using TMVA::DNN::ELossFunction; +using TMVA::DNN::EInitialization; +using TMVA::DNN::EOutputFunction; + +template +void randomMatrix(AMatrix &X) +{ + size_t m, n; + m = X.GetNrows(); + n = X.GetNcols(); + + TRandom rand(clock()); + + Double_t sigma = sqrt(10.0); + + for (size_t i = 0; i < m; i++) { + for (size_t j = 0; j < n; j++) { + X(i, j) = rand.Gaus(0.0, sigma); + } + } +} + +namespace TMVA { + +//////////////////////////////////////////////////////////////////////////////// +TString getValueTmp(const std::map &keyValueMap, TString key) +{ + key.ToUpper(); + std::map::const_iterator it = keyValueMap.find(key); + if (it == keyValueMap.end()) { + return TString(""); + } + return it->second; +} + +//////////////////////////////////////////////////////////////////////////////// +template +T getValueTmp(const std::map &keyValueMap, TString key, T defaultValue); + +//////////////////////////////////////////////////////////////////////////////// +template <> +int getValueTmp(const std::map &keyValueMap, TString key, int defaultValue) +{ + TString value(getValueTmp(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + return value.Atoi(); +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +double getValueTmp(const std::map &keyValueMap, TString key, double defaultValue) +{ + TString value(getValueTmp(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + return value.Atof(); +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +TString getValueTmp(const std::map &keyValueMap, TString key, TString defaultValue) +{ + TString value(getValueTmp(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + return value; +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +bool getValueTmp(const std::map &keyValueMap, TString key, bool defaultValue) +{ + TString value(getValueTmp(keyValueMap, key)); + if (value == "") { + return defaultValue; + } + + value.ToUpper(); + if (value == "TRUE" || value == "T" || value == "1") { + return true; + } + + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +template <> +std::vector getValueTmp(const std::map &keyValueMap, TString key, + std::vector defaultValue) +{ + TString parseString(getValueTmp(keyValueMap, key)); + if (parseString == "") { + return defaultValue; + } + + parseString.ToUpper(); + std::vector values; + + const TString tokenDelim("+"); + TObjArray *tokenStrings = parseString.Tokenize(tokenDelim); + TIter nextToken(tokenStrings); + TObjString *tokenString = (TObjString *)nextToken(); + for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) { + std::stringstream sstr; + double currentValue; + sstr << tokenString->GetString().Data(); + sstr >> currentValue; + values.push_back(currentValue); + } + return values; +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::DeclareOptions() +{ + // Set default values for all option strings + DeclareOptionRef(fInputLayoutString = "0|0|0##0|0|0", "InputLayout", "The Layout of the input"); + + DeclareOptionRef(fBatchLayoutString = "0|0|0##0|0|0", "BatchLayout", "The Layout of the batch"); + + DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR##DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network."); + + DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)" + " or cross entropy (binary classification)."); + AddPreDefVal(TString("CROSSENTROPY")); + AddPreDefVal(TString("SUMOFSQUARES")); + AddPreDefVal(TString("MUTUALEXCLUSIVE")); + + DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy"); + AddPreDefVal(TString("XAVIER")); + AddPreDefVal(TString("XAVIERUNIFORM")); + + DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on."); + AddPreDefVal(TString("STANDARD")); + AddPreDefVal(TString("CPU")); + AddPreDefVal(TString("GPU")); + AddPreDefVal(TString("OPENCL")); + + DeclareOptionRef(fTrainingStrategyString = "MaxEpochs=2000," + "GeneratorLearningRate=1e-1," + "GeneratorMomentum=0.3," + "GeneratorRepetitions=3," + "GeneratorConvergenceSteps=50," + "GeneratorTestRepetitions=7," + "GeneratorWeightDecay=0.0," + "GeneratorRenormalize=L2," + "GeneratorDropConfig=0.0," + "GeneratorDropRepetitions=5," + "DiscriminatorLearningRate=1e-1," + "DiscriminatorMomentum=0.3," + "DiscriminatorRepetitions=3," + "DiscriminatorConvergenceSteps=50," + "DiscriminatorBatchSize=30," + "DiscriminatorTestRepetitions=7," + "DiscriminatorWeightDecay=0.0," + "DiscriminatorRenormalize=L2," + "DiscriminatorDropConfig=0.0," + "DiscriminatorDropRepetitions=5|" + "MaxEpochs=2000," + "GeneratorLearningRate=1e-4," + "GeneratorMomentum=0.3," + "GeneratorRepetitions=3," + "GeneratorConvergenceSteps=50," + "GeneratorBatchSize=20," + "GeneratorTestRepetitions=7," + "GeneratorWeightDecay=0.001," + "GeneratorRenormalize=L2," + "GeneratorDropConfig=0.0+0.5+0.5," + "GeneratorDropRepetitions=5," + "GeneratorMultithreading=True," + "DiscriminatorLearningRate=1e-4," + "DiscriminatorMomentum=0.3," + "DiscriminatorRepetitions=3," + "DiscriminatorConvergenceSteps=50," + "DiscriminatorBatchSize=20," + "DiscriminatorTestRepetitions=7," + "DiscriminatorWeightDecay=0.001," + "DiscriminatorRenormalize=L2," + "DiscriminatorDropConfig=0.0+0.5+0.5," + "DiscriminatorDropRepetitions=5," + "DiscriminatorMultithreading=True", + "TrainingStrategy", "Defines the training strategies."); +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::ProcessOptions() +{ + + if (IgnoreEventsWithNegWeightsInTraining()) { + Log() << kINFO << "Will ignore negative events in training!" << Endl; + } + + if (fArchitectureString == "STANDARD") { + Log() << kERROR << "The STANDARD architecture has been deprecated. " + "Please use Architecture=CPU or Architecture=CPU." + "See the TMVA Users' Guide for instructions if you " + "encounter problems." + << Endl; + Log() << kFATAL << "The STANDARD architecture has been deprecated. " + "Please use Architecture=CPU or Architecture=CPU." + "See the TMVA Users' Guide for instructions if you " + "encounter problems." + << Endl; + } + + if (fArchitectureString == "OPENCL") { + Log() << kERROR << "The OPENCL architecture has not been implemented yet. " + "Please use Architecture=CPU or Architecture=CPU for the " + "time being. See the TMVA Users' Guide for instructions " + "if you encounter problems." + << Endl; + Log() << kFATAL << "The OPENCL architecture has not been implemented yet. " + "Please use Architecture=CPU or Architecture=CPU for the " + "time being. See the TMVA Users' Guide for instructions " + "if you encounter problems." + << Endl; + } + + if (fArchitectureString == "GPU") { +#ifndef R__HAS_TMVACUDA // Included only if DNNCUDA flag is _not_ set. + Log() << kERROR << "CUDA backend not enabled. Please make sure " + "you have CUDA installed and it was successfully " + "detected by CMAKE." + << Endl; + Log() << kFATAL << "CUDA backend not enabled. Please make sure " + "you have CUDA installed and it was successfully " + "detected by CMAKE." + << Endl; +#endif // DNNCUDA + } + + if (fArchitectureString == "CPU") { +#ifndef R__HAS_TMVACPU // Included only if DNNCPU flag is _not_ set. + Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure " + "you have a BLAS implementation and it was successfully " + "detected by CMake as well that the imt CMake flag is set." + << Endl; + Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure " + "you have a BLAS implementation and it was successfully " + "detected by CMake as well that the imt CMake flag is set." + << Endl; + #endif // DNNCPU + } + + // Input Layout + ParseInputLayout(); + ParseBatchLayout(); + ParseNetworkLayout(); + + // Loss function and output. + fOutputFunction = EOutputFunction::kSigmoid; + if (fAnalysisType == Types::kClassification) { + if (fErrorStrategy == "SUMOFSQUARES") { + std::cout << "Error Strategy String" << fErrorStrategy << std::endl; + fLossFunction = ELossFunction::kMeanSquaredError; + + } + if (fErrorStrategy == "CROSSENTROPY") { + std::cout << "Error Strategy String" << fErrorStrategy << std::endl; + fLossFunction = ELossFunction::kCrossEntropy; + } + fOutputFunction = EOutputFunction::kSigmoid; + } + + else if (fAnalysisType == Types::kRegression) { + if (fErrorStrategy != "SUMOFSQUARES") { + Log() << kWARNING << "For regression only SUMOFSQUARES is a valid " + << " neural net error function. Setting error function to " + << " SUMOFSQUARES now." << Endl; + } + + fLossFunction = ELossFunction::kMeanSquaredError; + fOutputFunction = EOutputFunction::kIdentity; + } + + else if (fAnalysisType == Types::kMulticlass) { + if (fErrorStrategy == "SUMOFSQUARES") { + fLossFunction = ELossFunction::kMeanSquaredError; + } + + if (fErrorStrategy == "CROSSENTROPY") { + fLossFunction = ELossFunction::kCrossEntropy; + } + + if (fErrorStrategy == "MUTUALEXCLUSIVE") { + fLossFunction = ELossFunction::kSoftmaxCrossEntropy; + } + fOutputFunction = EOutputFunction::kSoftmax; + } + + // Initialization + if (fWeightInitializationString == "XAVIER") { + fWeightInitialization = DNN::EInitialization::kGauss; + } else if (fWeightInitializationString == "XAVIERUNIFORM") { + fWeightInitialization = DNN::EInitialization::kUniform; + } else { + fWeightInitialization = DNN::EInitialization::kGauss; + } + + // Training settings. + + KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(",")); + for (auto &block : strategyKeyValues) { + + GANTTrainingSettings settings; + + settings.maxEpochs = getValueTmp(block, "MaxEpochs", 2000); + settings.generatorConvergenceSteps = getValueTmp(block, "GeneratorConvergenceSteps", 100); + settings.generatorBatchSize = getValueTmp(block, "GeneratorBatchSize", 32); + settings.generatorTestInterval = getValueTmp(block, "GeneratorTestRepetitions", 7); + settings.generatorWeightDecay = getValueTmp(block, "GeneratorWeightDecay", 0.0); + settings.generatorLearningRate = getValueTmp(block, "GeneratorLearningRate", 2e-4); + settings.generatorMomentum = getValueTmp(block, "GeneratorMomentum", 0.3); + settings.generatorDropoutProbabilities = getValueTmp(block, "GeneratorDropConfig", std::vector()); + + TString generatorRegularization = getValueTmp(block, "GeneratorRegularization", TString("NONE")); + if (generatorRegularization == "L1") { + settings.generatorRegularization = DNN::ERegularization::kL1; + } else if (generatorRegularization == "L2") { + settings.generatorRegularization = DNN::ERegularization::kL2; + } + + TString generatorStrMultithreading = getValueTmp(block, "GeneratorMultithreading", TString("True")); + + if (generatorStrMultithreading.BeginsWith("T")) { + settings.generatorMultithreading = true; + } else { + settings.generatorMultithreading = false; + } + + settings.discriminatorConvergenceSteps = getValueTmp(block, "DiscriminatorConvergenceSteps", 100); + settings.discriminatorBatchSize = getValueTmp(block, "DiscriminatorBatchSize", 32); + settings.discriminatorTestInterval = getValueTmp(block, "DiscriminatorTestRepetitions", 7); + settings.discriminatorWeightDecay = getValueTmp(block, "DiscriminatorWeightDecay", 0.0); + settings.discriminatorLearningRate = getValueTmp(block, "DiscriminatorLearningRate", 2e-5); + settings.discriminatorMomentum = getValueTmp(block, "DiscriminatorMomentum", 0.3); + settings.discriminatorDropoutProbabilities = getValueTmp(block, "DiscriminatorDropConfig", std::vector()); + + TString discriminatorRegularization = getValueTmp(block, "DiscriminatorRegularization", TString("NONE")); + if (discriminatorRegularization == "L1") { + settings.discriminatorRegularization = DNN::ERegularization::kL1; + } else if (discriminatorRegularization == "L2") { + settings.discriminatorRegularization = DNN::ERegularization::kL2; + } + + TString discriminatorStrMultithreading = getValueTmp(block, "DiscriminatorMultithreading", TString("True")); + + if (discriminatorStrMultithreading.BeginsWith("T")) { + settings.discriminatorMultithreading = true; + } else { + settings.discriminatorMultithreading = false; + } + + + fTrainingSettings.push_back(settings); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// default initializations +void MethodGAN::Init() +{ + // Nothing to do here +} + +//////////////////////////////////////////////////////////////////////////////// +/// Parse the model layout +void MethodGAN::ParseNetworkLayout() +{ + // Define the delimiter for separation of Generator and Discriminator Strings + const TString delim_model("##"); + const TString delim("|"); + + // Get the input layout string + TString networkLayoutString = this->GetLayoutString(); + + //Split string into Generator and Discriminator layout strings + TObjArray *modelStrings = networkLayoutString.Tokenize(delim_model); + TIter nextModelDim(modelStrings); + TObjString *modelDimString = (TObjString *)nextModelDim(); + int idxTokenModel = 0; + + for(; modelDimString != nullptr; modelDimString = (TObjString *)nextModelDim()) + { + TString strNetworkLayout(modelDimString->GetString()); + + if(idxTokenModel == 0) + this->SetGeneratorNetworkLayout(strNetworkLayout); + else if(idxTokenModel == 1) + this->SetDiscriminatorNetworkLayout(strNetworkLayout); + + ++idxTokenModel; + } +} + + +//////////////////////////////////////////////////////////////////////////////// +/// Parse the input layout +void MethodGAN::ParseInputLayout() +{ + // Define the delimiter for separation of Generator and Discriminator Strings + const TString delim_model("##"); + const TString delim("|"); + + // Get the input layout string + TString inputLayoutString = this->GetInputLayoutString(); + + size_t depth = 0; + size_t height = 0; + size_t width = 0; + + //Split string into Generator and Discriminator layout strings + TObjArray *modelStrings = inputLayoutString.Tokenize(delim_model); + TIter nextModelDim(modelStrings); + TObjString *modelDimString = (TObjString *)nextModelDim(); + int idxTokenModel = 0; + + for(; modelDimString != nullptr; modelDimString = (TObjString *)nextModelDim()) + { + // Split the input layout string + TObjArray *inputDimStrings = modelDimString->GetString().Tokenize(delim); + TIter nextInputDim(inputDimStrings); + TObjString *inputDimString = (TObjString *)nextInputDim(); + int idxToken = 0; + + for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) { + switch (idxToken) { + case 0: // Input Depth + { + TString strDepth(inputDimString->GetString()); + depth = (size_t)strDepth.Atoi(); + + if(idxTokenModel == 0) + this->SetGeneratorInputDepth(depth); + else if(idxTokenModel == 1) + this->SetDiscriminatorInputDepth(depth); + + break; + } + case 1: // Input Height + { + TString strHeight(inputDimString->GetString()); + height = (size_t)strHeight.Atoi(); + + if(idxTokenModel == 0) + this->SetGeneratorInputHeight(height); + else if(idxTokenModel == 1) + this->SetDiscriminatorInputHeight(height); + + break; + } + case 2: // input width + { + TString strWidth(inputDimString->GetString()); + width = (size_t)strWidth.Atoi(); + + if(idxTokenModel == 0) + this->SetGeneratorInputWidth(width); + else if(idxTokenModel == 1) + this->SetDiscriminatorInputWidth(width); + + break; + } + } + ++idxToken; + } + + ++idxTokenModel; + + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// Parse the batch layout +void MethodGAN::ParseBatchLayout() +{ + // Define the delimiter + const TString delim_model("##"); + const TString delim("|"); + + // Get the input layout string + TString batchLayoutString = this->GetBatchLayoutString(); + + size_t depth = 0; + size_t height = 0; + size_t width = 0; + + // Split the input layout string into Generator and Discriminator Strings + TObjArray *modelDimStrings = batchLayoutString.Tokenize(delim_model); + TIter nextModelDim(modelDimStrings); + TObjString *modelDimString = (TObjString *)nextModelDim(); + int idxTokenModel = 0; + + for(; modelDimString != nullptr; modelDimString = (TObjString *)nextModelDim()) + { + + TObjArray *batchDimStrings = modelDimString->GetString().Tokenize(delim); + TIter nextBatchDim(batchDimStrings); + TObjString *batchDimString = (TObjString *)nextBatchDim(); + int idxToken = 0; + + for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) { + switch (idxToken) { + case 0: // input depth + { + TString strDepth(batchDimString->GetString()); + depth = (size_t)strDepth.Atoi(); + + if(idxTokenModel == 0) + this->SetGeneratorBatchDepth(depth); + else if(idxTokenModel == 1) + this->SetDiscriminatorBatchDepth(depth); + + break; + } + case 1: // input height + { + TString strHeight(batchDimString->GetString()); + height = (size_t)strHeight.Atoi(); + + if(idxTokenModel == 0) + this->SetGeneratorBatchHeight(height); + else if(idxTokenModel == 1) + this->SetDiscriminatorBatchHeight(height); + + break; + } + case 2: // input width + { + TString strWidth(batchDimString->GetString()); + width = (size_t)strWidth.Atoi(); + + if(idxTokenModel == 0) + this->SetGeneratorBatchWidth(width); + else if(idxTokenModel == 1) + this->SetDiscriminatorBatchWidth(width); + + break; + } + } + + ++idxToken; + } + ++idxTokenModel; + } +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::CombineGAN(DeepNet_t &combinedDeepNet, DeepNet_t &generatorNet, DeepNet_t &discriminatorNet, std::unique_ptr &combinedNet) +{ + + for(size_t i = 0; i< generatorNet.GetDepth(); i++) + { + auto layer = generatorNet.GetLayerAt(i); + if(layer->GetLayerType() == "CONV") { + combinedDeepNet.AddConvLayer(dynamic_cast*>(layer)); + combinedNet->AddConvLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "MAXPOOL") { + combinedDeepNet.AddMaxPoolLayer(dynamic_cast*>(layer)); + combinedNet->AddMaxPoolLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "DENSE") { + combinedDeepNet.AddDenseLayer(dynamic_cast*>(layer)); + combinedNet->AddDenseLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "RESHAPE") { + //layer->SetInputDepth(generatorNet.GetBatchSize()); + combinedDeepNet.AddReshapeLayer(dynamic_cast*>(layer)); + combinedNet->AddReshapeLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "RNN") { + combinedDeepNet.AddBasicRNNLayer(dynamic_cast*>(layer)); + combinedNet->AddBasicRNNLayer(dynamic_cast*>(layer)); + } + } + + for(size_t i = 1; i< discriminatorNet.GetDepth(); i++) + { + auto layer = discriminatorNet.GetLayerAt(i); + + if(layer->GetLayerType() == "CONV") { + layer->SetIsTraining(0); + combinedDeepNet.AddConvLayer(dynamic_cast*>(layer)); + combinedNet->AddConvLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "MAXPOOL") { + combinedDeepNet.AddMaxPoolLayer(dynamic_cast*>(layer)); + combinedNet->AddMaxPoolLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "DENSE") { + layer->SetIsTraining(0); + combinedDeepNet.AddDenseLayer(dynamic_cast*>(layer)); + combinedNet->AddDenseLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "RESHAPE") { + combinedDeepNet.AddReshapeLayer(dynamic_cast*>(layer)); + combinedNet->AddReshapeLayer(dynamic_cast*>(layer)); + } else if(layer->GetLayerType() == "RNN") { + layer->SetIsTraining(0); + combinedDeepNet.AddBasicRNNLayer(dynamic_cast*>(layer)); + combinedNet->AddBasicRNNLayer(dynamic_cast*>(layer)); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::SetDiscriminatorLayerTraining(DeepNet_t &discriminatorNet) +{ + for(size_t i = 1; i< discriminatorNet.GetDepth(); i++) + { + auto layer = discriminatorNet.GetLayerAt(i); + layer->SetIsTraining(1); + } +} +//////////////////////////////////////////////////////////////////////////////// +/// Create a deep net based on the layout string +template +void MethodGAN::CreateDeepNet(DNN::TDeepNet &deepNet, + std::vector> &nets, std::unique_ptr &modelNet, TString layoutString) +{ + + // Layer specification, layer details + const TString layerDelimiter(","); + const TString subDelimiter("|"); + + // Split layers + TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter); + TIter nextLayer(layerStrings); + TObjString *layerString = (TObjString *)nextLayer(); + + for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) { + // Split layer details + TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + + // Determine the type of the layer + TString strLayerType = token->GetString(); + + const size_t inputSize = GetEvent()->GetNVariables(); + + if (strLayerType == "DENSE") { + MethodDL::ParseDenseLayer(inputSize, deepNet, nets, layerString->GetString(), subDelimiter, modelNet); + } else if (strLayerType == "CONV") { + MethodDL::ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); + } else if (strLayerType == "MAXPOOL") { + MethodDL::ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); + } else if (strLayerType == "RESHAPE") { + MethodDL::ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); + } else if (strLayerType == "RNN") { + MethodDL::ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter, modelNet); + } else if (strLayerType == "LSTM") { + Log() << kFATAL << "LSTM Layer is not yet fully implemented" << Endl; + //MethodDL::ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// Standard constructor. +MethodGAN::MethodGAN(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption) + : MethodBase(jobName, Types::kGAN, methodTitle, theData, theOption) +{ + // Nothing to do here +} + +//////////////////////////////////////////////////////////////////////////////// +/// Constructor from a weight file. +MethodGAN::MethodGAN(DataSetInfo &theData, const TString &theWeightFile) + : MethodBase(Types::kGAN, theData, theWeightFile) +{ + // Nothing to do here +} +// +//////////////////////////////////////////////////////////////////////////////// +/// Constructor from a weight file. + +//////////////////////////////////////////////////////////////////////////////// +/// Destructor. +MethodGAN::~MethodGAN() +{ + // Nothing to do here +} + +//////////////////////////////////////////////////////////////////////////////// +/// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs. +auto MethodGAN::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t +{ + KeyValueVector_t blockKeyValues; + const TString keyValueDelim("="); + + TObjArray *blockStrings = parseString.Tokenize(blockDelim); + TIter nextBlock(blockStrings); + TObjString *blockString = (TObjString *)nextBlock(); + + for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) { + blockKeyValues.push_back(std::map()); + std::map ¤tBlock = blockKeyValues.back(); + + TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim); + TIter nextToken(subStrings); + TObjString *token = (TObjString *)nextToken(); + + for (; token != nullptr; token = (TObjString *)nextToken()) { + TString strKeyValue(token->GetString()); + int delimPos = strKeyValue.First(keyValueDelim.Data()); + if (delimPos <= 0) continue; + + TString strKey = TString(strKeyValue(0, delimPos)); + strKey.ToUpper(); + TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length())); + + strKey.Strip(TString::kBoth, ' '); + strValue.Strip(TString::kBoth, ' '); + + currentBlock.insert(std::make_pair(strKey, strValue)); + } + } + return blockKeyValues; +} + +//////////////////////////////////////////////////////////////////////////////// +/// What kind of analysis type can handle the CNN +Bool_t MethodGAN::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/) +{ + if (type == Types::kClassification && numberClasses == 2) return kTRUE; + if (type == Types::kMulticlass) return kTRUE; + if (type == Types::kRegression) return kTRUE; + + return kFALSE; +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::CreateDiscriminatorFakeData(std::vector> &predTensor, TMatrixT &outputMatrix, TMatrixT &weights, TTensorDataLoader &trainingData, DeepNet_t &genDeepNet, + DeepNet_t &disDeepNet, EOutputFunction outputFunction, size_t nSamples, size_t classLabel, size_t epoch) +{ + std::ofstream outputFile; + // Storing image pixel values after every 5000 epochs + if(epoch % 5000 == 0) { + // For storing the output file,mention path where you would like to store the output file + outputFile.open("~/GSoC/Output_Files/output_mnist_final.csv", std::ios_base::app); + } + + //TODO: Remove this once discriminatorOutputMatrix is added + // Create the output + for (size_t i = 0; i < nSamples; i++) { + // Class of fake data is 1 + outputMatrix(i, 0) = classLabel; + } + + // Create the weights + for (size_t i = 0; i < nSamples; i++) { + weights(i, 0) = 1; + } + + for (size_t i = 0; i < nSamples; i++) { + predTensor.emplace_back(disDeepNet.GetBatchSize(), disDeepNet.GetBatchWidth()); + } + + size_t nVal = genDeepNet.GetBatchSize(); + size_t nOutput = genDeepNet.GetOutputWidth(); + size_t count = 0; + + for (auto batch : trainingData) { + + auto inputTensor = batch.GetInput(); + + //TODO:Need to overload Prediction function as tensor type needed (after Deconvolution implementation) + Matrix_t YHat(nVal, nOutput); + genDeepNet.Prediction(YHat, inputTensor, outputFunction); + + size_t rows_A, cols_A; + rows_A = predTensor[count].GetNrows(); + cols_A = predTensor[count].GetNcols(); + + size_t rows_B, cols_B; + rows_B = YHat.GetNrows(); + cols_B = YHat.GetNcols(); + + R__ASSERT(rows_A==rows_B); + R__ASSERT(cols_A==cols_B); + + for (size_t i = 0; i < rows_A; i++) { + for (size_t j = 0; j < cols_A; j++) { + + predTensor[count](i, j) = YHat(i,j); + + if(epoch % 5000 == 0) { + outputFile << YHat(i,j); + + if(j != cols_A-1) { + outputFile << ","; + } + } + } + if(epoch % 5000 == 0) { + outputFile << "\n"; + } + } + + count++; + } + if (epoch % 5000 == 0) { + outputFile.close(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +Double_t MethodGAN::ComputeLoss(TTensorDataLoader &generalDataloader, DeepNet_t &deepNet) +{ + Double_t error = 0.0; + + // TODO: Update library to compute loss while taking a step + for (auto batch : generalDataloader) { + auto inputTensor = batch.GetInput(); + auto outputMatrix = batch.GetOutput(); + auto weights = batch.GetWeights(); + + error += deepNet.Loss(inputTensor, outputMatrix, weights); + } + + return error; +} + +//////////////////////////////////////////////////////////////////////////////// +Double_t MethodGAN::ComputeLoss(TTensorDataLoader &generalDataloader, DeepNet_t &deepNet) +{ + Double_t error = 0.0; + + // TODO: Update library to compute loss while taking a step + for (auto batch : generalDataloader) { + auto inputTensor = batch.GetInput(); + auto outputMatrix = batch.GetOutput(); + auto weights = batch.GetWeights(); + + error += deepNet.Loss(inputTensor, outputMatrix, weights); + } + + return error; +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::CreateNoisyMatrices(std::vector> &inputTensor, TMatrixT &outputMatrix, TMatrixT &weights, DeepNet_t &deepNet, size_t nSamples, + size_t classLabel) +{ + + for (size_t i = 0; i < nSamples; i++) + { + inputTensor.emplace_back(deepNet.GetBatchHeight(), deepNet.GetBatchWidth()); + } + + for (size_t i = 0; i < nSamples; i++) + { + randomMatrix(inputTensor[i]); + } + + //TODO: Remove this once discriminatorOutputMatrix is added + // Create the output + for (size_t i = 0; i < nSamples; i++) + { + // Class of fake data is 1 + outputMatrix(i, 0) = classLabel; + } + + // Create the weights + for (size_t i = 0; i < nSamples; i++) + { + weights(i, 0) = 1; + } + +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::Train() +{ + if (fInteractive) { + Log() << kFATAL << "Not implemented yet" << Endl; + return; + } + + if (this->GetArchitectureString() == "GPU") { +#ifdef R__HAS_TMVACUDA + Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl; +#else + Log() << kFATAL << "CUDA backend not enabled. Please make sure " + "you have CUDA installed and it was successfully " + "detected by CMAKE." + << Endl; + return; +#endif + } else if (this->GetArchitectureString() == "OpenCL") { + Log() << kFATAL << "OpenCL backend not yet supported." << Endl; + return; + } else if (this->GetArchitectureString() == "CPU") { +#ifdef R__HAS_TMVACPU + Log() << kINFO << "Start of deep neural network training on CPU." << Endl << Endl; +#else + Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure " + "you have a BLAS implementation and it was successfully " + "detected by CMake as well that the imt CMake flag is set." + << Endl; + return; +#endif + } + +/// definitions for CUDA +#ifdef R__HAS_TMVACUDA // Included only if DNNCUDA flag is set. + using Architecture_t = DNN::TCuda; +#else +#ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set. + using Architecture_t = DNN::TCpu; +#else + using Architecture_t = DNN::TReference; +#endif +#endif + + // Determine the number of training and testing examples + size_t nTrainingSamples = GetEventCollection(Types::kTraining).size(); + size_t nTestingSamples = GetEventCollection(Types::kTesting).size(); + + size_t trainingPhase = 1; + for (GANTTrainingSettings &settings : this->GetTrainingSettings()) { + + size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading + + Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ":" << Endl; + trainingPhase++; + + size_t maxEpochs = settings.maxEpochs; + // After the processing of the options, initialize the master deep net + size_t generatorBatchSize = settings.generatorBatchSize; + size_t discriminatorBatchSize = settings.discriminatorBatchSize; + + // Should be replaced by actual implementation. No support for this now. + size_t generatorInputDepth = this->GetGeneratorInputDepth(); + size_t generatorInputHeight = this->GetGeneratorInputHeight(); + size_t generatorInputWidth = this->GetGeneratorInputWidth(); + size_t generatorBatchDepth = this->GetGeneratorBatchDepth(); + size_t generatorBatchHeight = this->GetGeneratorBatchHeight(); + size_t generatorBatchWidth = this->GetGeneratorBatchWidth(); + + size_t discriminatorInputDepth = this->GetDiscriminatorInputDepth(); + size_t discriminatorInputHeight = this->GetDiscriminatorInputHeight(); + size_t discriminatorInputWidth = this->GetDiscriminatorInputWidth(); + size_t discriminatorBatchDepth = this->GetDiscriminatorBatchDepth(); + size_t discriminatorBatchHeight = this->GetDiscriminatorBatchHeight(); + size_t discriminatorBatchWidth = this->GetDiscriminatorBatchWidth(); + + ELossFunction generatorJ = this->GetLossFunction(); + EInitialization generatorI = this->GetWeightInitialization(); + ERegularization generatorR = settings.generatorRegularization; + Scalar_t generatorWeightDecay = settings.generatorWeightDecay; + + ELossFunction discriminatorJ = this->GetLossFunction(); + EInitialization discriminatorI = this->GetWeightInitialization(); + ERegularization discriminatorR = settings.discriminatorRegularization; + Scalar_t discriminatorWeightDecay = settings.discriminatorWeightDecay; + +///////////////////////////////////////////////////////////////////////////////////////////////////// + //Settings for Discriminator Model + + //Batch size should be included in batch layout as well. There are two possibilities: + // 1. Batch depth = batch size one will input tensors as (batch_size x d1 x d2) + // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height + // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features + // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features ) + + if (discriminatorBatchDepth != discriminatorBatchSize && discriminatorBatchDepth > 1) { + Error("TrainCpu","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",discriminatorBatchDepth,discriminatorBatchSize); + return; + } + if (discriminatorBatchDepth == 1 && discriminatorBatchSize > 1 && discriminatorBatchSize != discriminatorBatchHeight ) { + Error("TrainCpu","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",discriminatorBatchHeight,discriminatorBatchSize); + return; + } + + + //check also that input layout compatible with batch layout + bool disBadLayout = false; + // case batch depth == batch size + if (discriminatorBatchDepth == discriminatorBatchSize) + disBadLayout = ( discriminatorInputDepth * discriminatorInputHeight * discriminatorInputWidth != discriminatorBatchHeight * discriminatorBatchWidth ) ; + // case batch Height is batch size + if (discriminatorBatchHeight == discriminatorBatchSize && discriminatorBatchDepth == 1) + disBadLayout |= ( discriminatorInputDepth * discriminatorInputHeight * discriminatorInputWidth != discriminatorBatchWidth); + if (disBadLayout) { + Error("TrainCpu","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ", + discriminatorInputDepth,discriminatorInputHeight,discriminatorInputWidth,discriminatorBatchDepth,discriminatorBatchHeight,discriminatorBatchWidth); + return; + } + + + DeepNet_t discriminatorDeepNet(discriminatorBatchSize, discriminatorInputDepth, discriminatorInputHeight, discriminatorInputWidth, discriminatorBatchDepth, discriminatorBatchHeight, discriminatorBatchWidth, discriminatorJ, discriminatorI, discriminatorR, discriminatorWeightDecay); + + // create a copy of DeepNet for evaluating but with batch size = 1 + // fNet is the saved network and will be with CPU or Referrence architecture + discriminatorFNet = std::unique_ptr(new DeepNetImpl_t(1, discriminatorInputDepth, discriminatorInputHeight, discriminatorInputWidth, discriminatorBatchDepth, discriminatorBatchHeight, discriminatorBatchWidth, discriminatorJ, discriminatorI, discriminatorR, discriminatorWeightDecay)); + + // Initialize the vector of slave nets + std::vector discriminatorNets{}; + discriminatorNets.reserve(nThreads); + for (size_t i = 0; i < nThreads; i++) { + // create a copies of the master deep net + discriminatorNets.push_back(discriminatorDeepNet); + } + + // Add all appropriate layers to deepNet and copies to fNet + CreateDeepNet(discriminatorDeepNet, discriminatorNets, discriminatorFNet, this->GetDiscriminatorNetworkLayoutString()); + + // print the created network + std::cout << "***** Discriminator Deep Learning Network *****\n"; + discriminatorDeepNet.Print(); + + // Initialize the minimizer + DNN::TDLGradientDescent discriminatorMinimizer(settings.discriminatorLearningRate, settings.discriminatorConvergenceSteps, + settings.discriminatorTestInterval); + +///////////////////////////////////////////////////////////////////////////////////////////////////// + //Settings for Generator Model + + //Batch size should be included in batch layout as well. There are two possibilities: + // 1. Batch depth = batch size one will input tensors as (batch_size x d1 x d2) + // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height + // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features + // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features ) +///////////////////////////////////////////////////////////////////////////////////////////////////// + + if (generatorBatchDepth != generatorBatchSize && generatorBatchDepth > 1) { + Error("TrainCpu","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu", generatorBatchDepth, generatorBatchSize); + return; + } + if (generatorBatchDepth == 1 && generatorBatchSize > 1 && generatorBatchSize != generatorBatchHeight ) { + Error("TrainCpu","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu", generatorBatchHeight, generatorBatchSize); + return; + } + + //check also that input layout compatible with batch layout + bool genBadLayout = false; + // case batch depth == batch size + if (generatorBatchDepth == generatorBatchSize) + genBadLayout = ( generatorInputDepth * generatorInputHeight * generatorInputWidth != generatorBatchHeight * generatorBatchWidth ) ; + // case batch Height is batch size + if (generatorBatchHeight == generatorBatchSize && generatorBatchDepth == 1) + genBadLayout |= ( generatorInputDepth * generatorInputHeight * generatorInputWidth != generatorBatchWidth); + if (genBadLayout) { + Error("TrainCpu","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ", + generatorInputDepth, generatorInputHeight, generatorInputWidth, generatorBatchDepth, generatorBatchHeight, generatorBatchWidth ); + return; + } + + DeepNet_t generatorDeepNet(generatorBatchSize, generatorInputDepth, generatorInputHeight, generatorInputWidth, generatorBatchDepth, generatorBatchHeight, + generatorBatchWidth, generatorJ, generatorI, generatorR, generatorWeightDecay); + + // create a copy of DeepNet for evaluating but with batch size = 1 + // fNet is the saved network and will be with CPU or Referrence architecture + generatorFNet = std::unique_ptr(new DeepNetImpl_t(1, generatorInputDepth, generatorInputHeight, generatorInputWidth, generatorBatchDepth, + generatorBatchHeight, generatorBatchWidth, generatorJ, generatorI, generatorR, generatorWeightDecay)); + + // Initialize the vector of slave nets + std::vector generatorNets{}; + generatorNets.reserve(nThreads); + for (size_t i = 0; i < nThreads; i++) { + // create a copies of the master deep net + generatorNets.push_back(generatorDeepNet); + } + + // Add all appropriate layers to deepNet and copies to fNet + CreateDeepNet(generatorDeepNet, generatorNets, generatorFNet, this->GetGeneratorNetworkLayoutString()); + + // print the created network + std::cout << "***** Generator Deep Learning Network *****\n"; + generatorDeepNet.Print(); + + // Initialize the minimizer + DNN::TDLGradientDescent generatorMinimizer(settings.generatorLearningRate, settings.generatorConvergenceSteps, + settings.generatorTestInterval); + + // Loading the training and testing datasets + TMVAInput_t discriminatorTrainingTuple = std::tie(GetEventCollection(Types::kTraining), DataInfo()); + TensorDataLoader_t discriminatorTrainingData(discriminatorTrainingTuple, nTrainingSamples, discriminatorDeepNet.GetBatchSize(), + discriminatorDeepNet.GetBatchDepth(), discriminatorDeepNet.GetBatchHeight(), discriminatorDeepNet.GetBatchWidth(), + discriminatorDeepNet.GetOutputWidth(), nThreads); + + TMVAInput_t discriminatorTestTuple = std::tie(GetEventCollection(Types::kTesting), DataInfo()); + TensorDataLoader_t discriminatorTestingData(discriminatorTestTuple, nTestingSamples, discriminatorDeepNet.GetBatchSize(), + discriminatorDeepNet.GetBatchDepth(), discriminatorDeepNet.GetBatchHeight(), discriminatorDeepNet.GetBatchWidth(), + discriminatorDeepNet.GetOutputWidth(), nThreads); + + //Creating noise matrices for input to generator + size_t nOutputs = 1; + + //Class Label is 0 for fake data + size_t fakeClassLabel = 0.0; + + TMatrixT generatorTrainingOutputMatrix(nTrainingSamples, nOutputs); + TMatrixT generatorTrainingWeights(nTrainingSamples, 1); + + TMatrixT generatorTestingOutputMatrix(nTestingSamples, nOutputs); + TMatrixT generatorTestingWeights(nTestingSamples, 1); + + std::vector> generatorTrainingInputTensor; + generatorTrainingInputTensor.reserve(nTrainingSamples); + + std::vector> generatorTestingInputTensor; + generatorTestingInputTensor.reserve(nTestingSamples); + + CreateNoisyMatrices(generatorTrainingInputTensor, generatorTrainingOutputMatrix, generatorTrainingWeights, generatorDeepNet, nTrainingSamples, fakeClassLabel); + + TensorInput generatorTrainingTuple(generatorTrainingInputTensor, generatorTrainingOutputMatrix, generatorTrainingWeights); + + // Loading the training and testing datasets + TTensorDataLoader generatorTrainingData(generatorTrainingTuple, nTrainingSamples, generatorDeepNet.GetBatchSize(), + generatorDeepNet.GetBatchDepth(), generatorDeepNet.GetBatchHeight(), generatorDeepNet.GetBatchWidth(), + discriminatorDeepNet.GetOutputWidth(), nThreads); + + CreateNoisyMatrices(generatorTestingInputTensor, generatorTestingOutputMatrix, generatorTestingWeights, generatorDeepNet, nTestingSamples, fakeClassLabel); + + TensorInput generatorTestingTuple(generatorTestingInputTensor, generatorTestingOutputMatrix, generatorTestingWeights); + + // Loading the training and testing datasets + TTensorDataLoader generatorTestingData(generatorTestingTuple, nTestingSamples, generatorDeepNet.GetBatchSize(), + generatorDeepNet.GetBatchDepth(), generatorDeepNet.GetBatchHeight(), generatorDeepNet.GetBatchWidth(), + generatorDeepNet.GetOutputWidth(), nThreads); + + + //size_t generatorBatchesInEpoch = nTrainingSamples / generatorDeepNet.GetBatchSize(); + +////////////////////////////////////////////////////////////////////////////////////////////////// +///////Discriminator Training +////////////////////////////////////////////////////////////////////////////////////////////////// + + // Initialize the vector of batches, one batch for one slave network + std::vector> discriminatorBatches{}; + + // count the steps until the convergence + size_t discriminatorStepCount = 0; + //size_t discriminatorBatchesInEpoch = nTrainingSamples / discriminatorDeepNet.GetBatchSize(); + + // Initialize the vector of batches, one batch for one slave network + std::vector> generatorBatches{}; + + // count the steps until the convergence + size_t generatorStepCount = 0; + //size_t generatorBatchesInEpoch = nTrainingSamples / generatorDeepNet.GetBatchSize(); + + //Double_t disMinTestError = 0; + // use discriminator with 0 seed to get always different values + RandomGenerator disRng(0); + + //Double_t genMinTestError = 0; + // use generator with 0 seed to get always different values + RandomGenerator genRng(0); + + //execute all epochs + for (size_t epoch = 0; epoch < maxEpochs; ++epoch) { + + discriminatorStepCount++; + discriminatorTrainingData.Shuffle(disRng); + + generatorStepCount++; + generatorTrainingData.Shuffle(genRng); + + SetDiscriminatorLayerTraining(discriminatorDeepNet); + + // execute one epoch on discriminator real data + for (auto discriminatorMy_batch : discriminatorTrainingData) { + + // execute one minimization step + // StepMomentum is currently not written for single thread, TODO write it + if (settings.discriminatorMomentum > 0.0) { + discriminatorMinimizer.Step(discriminatorDeepNet, discriminatorMy_batch.GetInput(), discriminatorMy_batch.GetOutput(), discriminatorMy_batch.GetWeights()); + } + else { + discriminatorMinimizer.Step(discriminatorDeepNet, discriminatorMy_batch.GetInput(), discriminatorMy_batch.GetOutput(), discriminatorMy_batch.GetWeights()); + } + } + + TMatrixT discriminatorTrainingOutputMatrix(nTrainingSamples, nOutputs); + TMatrixT discriminatorTrainingWeights(nTrainingSamples, 1); + + std::vector> discriminatorTrainingPredTensor; + + CreateDiscriminatorFakeData(discriminatorTrainingPredTensor, discriminatorTrainingOutputMatrix, discriminatorTrainingWeights, + generatorTrainingData, generatorDeepNet, discriminatorDeepNet, fOutputFunction, nTrainingSamples, fakeClassLabel, epoch); + + TensorInput discriminatorFakeTrainingTuple(discriminatorTrainingPredTensor, discriminatorTrainingOutputMatrix, discriminatorTrainingWeights); + + TTensorDataLoader discriminatorFakeTrainingData(discriminatorFakeTrainingTuple, nTrainingSamples, discriminatorDeepNet.GetBatchSize(), + discriminatorDeepNet.GetBatchDepth(), discriminatorDeepNet.GetBatchHeight(), + discriminatorDeepNet.GetBatchWidth(), discriminatorDeepNet.GetOutputWidth(), nThreads); + + //TODO: Need to add the option to change the discriminatorBatchesInEpoch to be varying for fake data + for (auto discriminatorMy_fakeBatch : discriminatorFakeTrainingData) { + + // execute one minimization step + // StepMomentum is currently not written for single thread, TODO write it + if (settings.discriminatorMomentum > 0.0) { + discriminatorMinimizer.Step(discriminatorDeepNet, discriminatorMy_fakeBatch.GetInput(), discriminatorMy_fakeBatch.GetOutput(), discriminatorMy_fakeBatch.GetWeights()); + } else { + discriminatorMinimizer.Step(discriminatorDeepNet, discriminatorMy_fakeBatch.GetInput(), discriminatorMy_fakeBatch.GetOutput(), discriminatorMy_fakeBatch.GetWeights()); + } + } + + + /// COMPUTE TRAINING ERROR FOR DISCRIMINATOR + Double_t discriminatorTrainingError = 0.0; + discriminatorTrainingError += ComputeLoss(discriminatorTrainingData, discriminatorDeepNet); + discriminatorTrainingError += ComputeLoss(discriminatorFakeTrainingData, discriminatorDeepNet); + //TODO: Change to incorporate varying nTrainingSamples + discriminatorTrainingError /= (Double_t)(2*nTrainingSamples / discriminatorBatchSize); + + if (!fInteractive) { + Log() << std::setw(10) << "Epoch" + << " | " << std::setw(12) << "Discriminator Train Err." << Endl; + std::string separator(62, '-'); + Log() << separator << Endl; + } + + Log() << std::setw(10) << discriminatorStepCount << " | " << std::setw(12) << discriminatorTrainingError + << Endl; + + if ((discriminatorStepCount % discriminatorMinimizer.GetTestInterval()) == 0) { + + /* + /// COMPUTE TESTING ERROR FOR DISCRIMINATOR + Double_t discriminatorTestingError = 0.0; + //TODO: Change to incorporate varying nTrainingSamples + discriminatorTestingError += ComputeLoss(discriminatorTestingData, discriminatorDeepNet); + + TMatrixT discriminatorTestingOutputMatrix(nTestingSamples, nOutputs); + TMatrixT discriminatorTestingWeights(nTestingSamples, 1); + + std::vector> discriminatorTestingPredTensor; + + CreateDiscriminatorFakeData(discriminatorTestingPredTensor, discriminatorTestingOutputMatrix, discriminatorTestingWeights, generatorTestingData, generatorDeepNet, discriminatorDeepNet, nTestingSamples, fakeClassLabel); + + TensorInput discriminatorFakeTestingTuple(discriminatorTestingPredTensor, discriminatorTestingOutputMatrix, discriminatorTestingWeights); + + TTensorDataLoader discriminatorFakeTestingData(discriminatorFakeTestingTuple, nTestingSamples, discriminatorDeepNet.GetBatchSize(), + discriminatorDeepNet.GetBatchDepth(), discriminatorDeepNet.GetBatchHeight(), discriminatorDeepNet.GetBatchWidth(), + discriminatorDeepNet.GetOutputWidth(), nThreads); + + discriminatorTestingError += ComputeLoss(discriminatorFakeTestingData, discriminatorDeepNet); + discriminatorTestingError /= (Double_t)(2*nTestingSamples / discriminatorBatchSize); + + + //discriminatorConverged = discriminatorStepCount >= settings.maxEpochs; + + if (!fInteractive) { + Log() << std::setw(10) << "Epoch" + << " | " << std::setw(12) << "Discriminator Test Err." << Endl; + std::string separator(62, '-'); + Log() << separator << Endl; + } + + Log() << std::setw(10) << discriminatorStepCount << " | " << std::setw(12) << discriminatorTestingError + << Endl;*/ + } + + // TODO: Instead of creating network repeatedly, just update the new weights + + // create a copy of DeepNet for evaluating but with batch size = 1 + // fNet is the saved network and will be with CPU or Referrence architecture + //combinedFNet = std::unique_ptr(new DeepNetImpl_t(1, generatorNet.GetInputDepth(), generatorNet.GetInputHeight(), + // generatorNet.GetInputWidth(), generatorNet.GetBatchDepth(), generatorNet.GetBatchHeight(), generatorNet.GetBatchWidth(), + // loss, initialization, regularization, weightDecay)); + + //Create combined Generator and Discriminator DeepNet_t + DeepNet_t combinedDeepNet(generatorDeepNet.GetBatchSize(), generatorDeepNet.GetInputDepth(), generatorDeepNet.GetInputHeight(), generatorDeepNet.GetInputWidth(), + generatorDeepNet.GetBatchDepth(), generatorDeepNet.GetBatchHeight(), generatorDeepNet.GetBatchWidth(), discriminatorJ, generatorI, generatorR, generatorWeightDecay); + + // create a copy of DeepNet for evaluating but with batch size = 1 + // fNet is the saved network and will be with CPU or Referrence architecture + combinedFNet = std::unique_ptr(new DeepNetImpl_t(1, generatorDeepNet.GetInputDepth(), generatorDeepNet.GetInputHeight(), generatorDeepNet.GetInputWidth(), + generatorDeepNet.GetBatchDepth(), generatorDeepNet.GetBatchHeight(), generatorDeepNet.GetBatchWidth(), discriminatorJ, generatorI, generatorR, generatorWeightDecay)); + + //std::cout << "***** Combined Deep Learning Network *****\n"; + CombineGAN(combinedDeepNet, generatorDeepNet, discriminatorDeepNet, combinedFNet); + + for (auto generatorMy_batch : generatorTrainingData) { + + auto input = generatorMy_batch.GetInput(); + auto output = generatorMy_batch.GetOutput(); + auto weights = generatorMy_batch.GetWeights(); + + // execute one minimization step + // StepMomentum is currently not written for single thread, TODO write it + if (settings.generatorMomentum > 0.0) { + //minimizer.StepMomentum(generatorDeepNet, nets, batches, settings.momentum); + generatorMinimizer.Step(combinedDeepNet, input, output, weights); + } else { + //minimizer.Step(generatorDeepNet, nets, batches); + generatorMinimizer.Step(combinedDeepNet, input, output, weights); + } + + } + //} + + /// COMPUTE TRAINING ERROR FOR GENERATOR + Double_t generatorTrainingError = 0.0; + generatorTrainingError = ComputeLoss(generatorTrainingData, combinedDeepNet); + //TODO: Change to incorporate varying nTrainingSamples + generatorTrainingError /= (Double_t)(nTrainingSamples / generatorBatchSize); + + if (!fInteractive) { + Log() << std::setw(10) << "Epoch" + << " | " << std::setw(12) << "Generator Train Err." << Endl; + std::string separator(62, '-'); + Log() << separator << Endl; + } + + Log() << std::setw(10) << generatorStepCount << " | " << std::setw(12) << generatorTrainingError + << Endl; + + if ((generatorStepCount % generatorMinimizer.GetTestInterval()) == 0) { + + /// COMPUTE TESTING ERROR FOR GENERATOR + Double_t generatorTestingError = 0.0; + //TODO: Change to incorporate varying nTrainingSamples + generatorTestingError = ComputeLoss(discriminatorTestingData, combinedDeepNet); + generatorTestingError /= (Double_t)(nTestingSamples / generatorBatchSize); + + if (!fInteractive) { + Log() << std::setw(10) << "Epoch" + << " | " << std::setw(12) << "Generator Test Err." << Endl; + std::string separator(62, '-'); + Log() << separator << Endl; + } + + Log() << std::setw(10) << generatorStepCount << " | " << std::setw(12) << generatorTestingError + << Endl; + + } + } + } + //} +} + +//////////////////////////////////////////////////////////////////////////////// +Double_t MethodGAN::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/) +{ + //Double_t generatorMvaValue = this->GetMvaValueGAN(generatorFNet); + Double_t discriminatorMvaValue = this->GetMvaValueGAN(discriminatorFNet); + + return discriminatorMvaValue; +} + +//////////////////////////////////////////////////////////////////////////////// + Double_t MethodGAN::GetMvaValueGAN(std::unique_ptr & modelNet, Double_t * /*errLower*/, Double_t * /*errUpper*/) +{ + using Matrix_t = typename ArchitectureImpl_t::Matrix_t; + + int nVariables = GetEvent()->GetNVariables(); + int batchWidth = modelNet->GetBatchWidth(); + int batchDepth = modelNet->GetBatchDepth(); + int batchHeight = modelNet->GetBatchHeight(); + int nb = modelNet->GetBatchSize(); + int noutput = modelNet->GetOutputWidth(); + // note that batch size whould be equal to 1 + R__ASSERT(nb == 1); + + std::vector X{}; + Matrix_t YHat(nb, noutput); + + // get current event + const std::vector &inputValues = GetEvent()->GetValues(); + + // for (int i = 0; i < batchDepth; ++i) + + // find dimension of matrices + // Tensor outer size must be equal to 1 + // because nb ==1 by definition + int n1 = batchHeight; + int n2 = batchWidth; + // treat case where batchHeight is batchSize in case of first Dense layers + if (batchDepth == 1 && GetGeneratorInputHeight() == 1 && GetGeneratorInputDepth() == 1) n1 = 1; + + X.emplace_back(Matrix_t(n1, n2)); + + if (n1 > 1) { + R__ASSERT( n1*n2 == nVariables); + // for CNN or RNN evaluations + for (int j = 0; j < n1; ++j) { + for (int k = 0; k < n2; k++) { + X[0](j, k) = inputValues[j*n1+k]; + } + } + } + else { + R__ASSERT( n2 == nVariables); + for (int k = 0; k < n2; k++) { + X[0](0, k) = inputValues[k]; + } + } + + // perform the prediction + modelNet->Prediction(YHat, X, fOutputFunction); + + double mvaValue = YHat(0, 0); + + // for debugging +// #ifdef DEBUG +// TMatrixF xInput(n1,n2, inputValues.data() ); +// std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl; +// xInput.Print(); +// std::cout << "Output of DeepNet " << mvaValue << std::endl; +// auto & deepnet = *modelNet; +// const auto * rnn = deepnet.GetLayerAt(0); +// const auto & rnn_output = rnn->GetOutput(); +// std::cout << "DNN output " << rnn_output.size() << std::endl; +// for (size_t i = 0; i < rnn_output.size(); ++i) { +// TMatrixD m(rnn_output[i].GetNrows(), rnn_output[i].GetNcols() , rnn_output[i].GetRawDataPointer() ); +// m.Print(); +// //rnn_output[i].Print(); +// } +// #endif +// std::cout << " { " << GetEvent()->GetClass() << " , " << mvaValue << " } "; + + + return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue; + +} +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::AddWeightsXMLTo(void *parent) const +{ + AddWeightsXMLToGenerator(parent); + AddWeightsXMLToDiscriminator(parent); +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::AddWeightsXMLToGenerator(void *parent) const +{ + // Create the parrent XML node with name "Weights" + auto & xmlEngine = gTools().xmlengine(); + void* nn = xmlEngine.NewChild(parent, 0, "GeneratorWeights"); + + /*! Get all necessary information, in order to be able to reconstruct the net + * if we read the same XML file. */ + // Deep Net specific info + Int_t depth = generatorFNet->GetDepth(); + + Int_t inputDepth = generatorFNet->GetInputDepth(); + Int_t inputHeight = generatorFNet->GetInputHeight(); + Int_t inputWidth = generatorFNet->GetInputWidth(); + + Int_t batchSize = generatorFNet->GetBatchSize(); + + Int_t batchDepth = generatorFNet->GetBatchDepth(); + Int_t batchHeight = generatorFNet->GetBatchHeight(); + Int_t batchWidth = generatorFNet->GetBatchWidth(); + + char lossFunction = static_cast(generatorFNet->GetLossFunction()); + char initialization = static_cast(generatorFNet->GetInitialization()); + char regularization = static_cast(generatorFNet->GetRegularization()); + + Double_t weightDecay = generatorFNet->GetWeightDecay(); + + // Method specific info (not sure these are needed) + char outputFunction = static_cast(this->GetOutputFunction()); + //char lossFunction = static_cast(this->GetLossFunction()); + + // Add attributes to the parent node + xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth)); + + xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth)); + xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight)); + xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth)); + + xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize)); + xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth)); + xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight)); + xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth)); + + xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction)); + xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization)); + xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization)); + xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction)); + + gTools().AddAttr(nn, "WeightDecay", weightDecay); + + for (Int_t i = 0; i < depth; i++) + { + generatorFNet->GetLayerAt(i) -> AddWeightsXMLTo(nn); + } +} +////////////////////////////////////////////////////////////////////////// +void MethodGAN::AddWeightsXMLToDiscriminator(void *parent) const +{ + // Create the parrent XML node with name "Weights" + auto & xmlEngine = gTools().xmlengine(); + void* nn = xmlEngine.NewChild(parent, 0, "DiscriminatorWeights"); + + /*! Get all necessary information, in order to be able to reconstruct the net + * if we read the same XML file. */ + // Deep Net specific info + Int_t depth = discriminatorFNet->GetDepth(); + + Int_t inputDepth = discriminatorFNet->GetInputDepth(); + Int_t inputHeight = discriminatorFNet->GetInputHeight(); + Int_t inputWidth = discriminatorFNet->GetInputWidth(); + + Int_t batchSize = discriminatorFNet->GetBatchSize(); + + Int_t batchDepth = discriminatorFNet->GetBatchDepth(); + Int_t batchHeight = discriminatorFNet->GetBatchHeight(); + Int_t batchWidth = discriminatorFNet->GetBatchWidth(); + + char lossFunction = static_cast(discriminatorFNet->GetLossFunction()); + char initialization = static_cast(discriminatorFNet->GetInitialization()); + char regularization = static_cast(discriminatorFNet->GetRegularization()); + + Double_t weightDecay = discriminatorFNet->GetWeightDecay(); + + // Method specific info (not sure these are needed) + char outputFunction = static_cast(this->GetOutputFunction()); + //char lossFunction = static_cast(this->GetLossFunction()); + + // Add attributes to the parent node + xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth)); + + xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth)); + xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight)); + xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth)); + + xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize)); + xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth)); + xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight)); + xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth)); + + xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction)); + xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization)); + xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization)); + xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction)); + + gTools().AddAttr(nn, "WeightDecay", weightDecay); + + for (Int_t i = 0; i < depth; i++) + { + discriminatorFNet->GetLayerAt(i) -> AddWeightsXMLTo(nn); + } +} +////////////////////////////////////////////////////////////////////////// +void MethodGAN::ReadWeightsFromXML(void *wghtnode) +{ + ReadWeightsFromXMLGenerator(wghtnode); + ReadWeightsFromXMLDiscriminator(wghtnode); +} +///////////////////////////////////////////////////////////////////////// +void MethodGAN::ReadWeightsFromXMLGenerator(void *rootXML) +{ + std::cout << "READ DL network from XML " << std::endl; + std::cout << "ReadWeightsfromXML" << std::endl; + + auto netXML = gTools().GetChild(rootXML, "GeneratorWeights"); + if (!netXML){ + netXML = rootXML; + } + + size_t netDepth; + gTools().ReadAttr(netXML, "NetDepth", netDepth); + + size_t inputDepth, inputHeight, inputWidth; + gTools().ReadAttr(netXML, "InputDepth", inputDepth); + gTools().ReadAttr(netXML, "InputHeight", inputHeight); + gTools().ReadAttr(netXML, "InputWidth", inputWidth); + + size_t batchSize, batchDepth, batchHeight, batchWidth; + gTools().ReadAttr(netXML, "BatchSize", batchSize); + // use always batchsize = 1 + //batchSize = 1; + gTools().ReadAttr(netXML, "BatchDepth", batchDepth); + gTools().ReadAttr(netXML, "BatchHeight", batchHeight); + gTools().ReadAttr(netXML, "BatchWidth", batchWidth); + + char lossFunctionChar; + gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar); + char initializationChar; + gTools().ReadAttr(netXML, "Initialization", initializationChar); + char regularizationChar; + gTools().ReadAttr(netXML, "Regularization", regularizationChar); + char outputFunctionChar; + gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar); + double weightDecay; + gTools().ReadAttr(netXML, "WeightDecay", weightDecay); + + std::cout << "lossfunction is " << lossFunctionChar << std::endl; + + // DeepNetCpu_t is defined in MethodDL.h + + generatorFNet = std::unique_ptr(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, + batchHeight, batchWidth, + static_cast(lossFunctionChar), + static_cast(initializationChar), + static_cast(regularizationChar), + weightDecay)); + + fOutputFunction = static_cast(outputFunctionChar); + + //size_t previousWidth = inputWidth; + auto layerXML = gTools().xmlengine().GetChild(netXML); + + // loop on the layer and add them to the network + for (size_t i = 0; i < netDepth; i++) { + + TString layerName = gTools().xmlengine().GetNodeName(layerXML); + + // case of dense layer + if (layerName == "DenseLayer") { + + // read width and activation function and then we can create the layer + size_t width = 0; + gTools().ReadAttr(layerXML, "Width", width); + + // Read activation function. + TString funcString; + gTools().ReadAttr(layerXML, "ActivationFunction", funcString); + EActivationFunction func = static_cast(funcString.Atoi()); + + + generatorFNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability + } + + // Convolutional Layer + else if (layerName == "ConvLayer") { + + // read width and activation function and then we can create the layer + size_t depth = 0; + gTools().ReadAttr(layerXML, "Depth", depth); + size_t fltHeight, fltWidth = 0; + size_t strideRows, strideCols = 0; + size_t padHeight, padWidth = 0; + gTools().ReadAttr(layerXML, "FilterHeight", fltHeight); + gTools().ReadAttr(layerXML, "FilterWidth", fltWidth); + gTools().ReadAttr(layerXML, "StrideRows", strideRows); + gTools().ReadAttr(layerXML, "StrideCols", strideCols); + gTools().ReadAttr(layerXML, "PaddingHeight", padHeight); + gTools().ReadAttr(layerXML, "PaddingWidth", padWidth); + + // Read activation function. + TString funcString; + gTools().ReadAttr(layerXML, "ActivationFunction", funcString); + EActivationFunction actFunction = static_cast(funcString.Atoi()); + + + generatorFNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols, + padHeight, padWidth, actFunction); + + } + + // MaxPool Layer + else if (layerName == "MaxPoolLayer") { + + // read maxpool layer info + size_t frameHeight, frameWidth = 0; + size_t strideRows, strideCols = 0; + gTools().ReadAttr(layerXML, "FrameHeight", frameHeight); + gTools().ReadAttr(layerXML, "FrameWidth", frameWidth); + gTools().ReadAttr(layerXML, "StrideRows", strideRows); + gTools().ReadAttr(layerXML, "StrideCols", strideCols); + + generatorFNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + } + else if (layerName == "ReshapeLayer") { + + // read reshape layer info + size_t depth, height, width = 0; + gTools().ReadAttr(layerXML, "Depth", depth); + gTools().ReadAttr(layerXML, "Height", height); + gTools().ReadAttr(layerXML, "Width", width); + int flattening = 0; + gTools().ReadAttr(layerXML, "Flattening",flattening ); + + generatorFNet->AddReshapeLayer(depth, height, width, flattening); + + } + else if (layerName == "RNNLayer") { + + std::cout << "add RNN layer " << std::endl; + + // read reshape layer info + size_t stateSize,inputSize, timeSteps = 0; + int rememberState= 0; + gTools().ReadAttr(layerXML, "StateSize", stateSize); + gTools().ReadAttr(layerXML, "InputSize", inputSize); + gTools().ReadAttr(layerXML, "TimeSteps", timeSteps); + gTools().ReadAttr(layerXML, "RememberState", rememberState ); + + generatorFNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState); + + } + + + // read eventually weights and biases + generatorFNet->GetLayers().back()->ReadWeightsFromXML(layerXML); + + // read next layer + layerXML = gTools().GetNextChild(layerXML); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::ReadWeightsFromXMLDiscriminator(void *rootXML) +{ + std::cout << "READ DL network from XML " << std::endl; + + auto netXML = gTools().GetChild(rootXML, "DiscriminatorWeights"); + if (!netXML){ + netXML = rootXML; + } + + size_t netDepth; + gTools().ReadAttr(netXML, "NetDepth", netDepth); + + size_t inputDepth, inputHeight, inputWidth; + gTools().ReadAttr(netXML, "InputDepth", inputDepth); + gTools().ReadAttr(netXML, "InputHeight", inputHeight); + gTools().ReadAttr(netXML, "InputWidth", inputWidth); + + size_t batchSize, batchDepth, batchHeight, batchWidth; + gTools().ReadAttr(netXML, "BatchSize", batchSize); + // use always batchsize = 1 + //batchSize = 1; + gTools().ReadAttr(netXML, "BatchDepth", batchDepth); + gTools().ReadAttr(netXML, "BatchHeight", batchHeight); + gTools().ReadAttr(netXML, "BatchWidth", batchWidth); + + char lossFunctionChar; + gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar); + char initializationChar; + gTools().ReadAttr(netXML, "Initialization", initializationChar); + char regularizationChar; + gTools().ReadAttr(netXML, "Regularization", regularizationChar); + char outputFunctionChar; + gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar); + double weightDecay; + gTools().ReadAttr(netXML, "WeightDecay", weightDecay); + + std::cout << "lossfunction is " << lossFunctionChar << std::endl; + + // DeepNetCpu_t is defined in MethodDL.h + + discriminatorFNet = std::unique_ptr(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, + batchHeight, batchWidth, + static_cast(lossFunctionChar), + static_cast(initializationChar), + static_cast(regularizationChar), + weightDecay)); + + fOutputFunction = static_cast(outputFunctionChar); + + //size_t previousWidth = inputWidth; + auto layerXML = gTools().xmlengine().GetChild(netXML); + + // loop on the layer and add them to the network + for (size_t i = 0; i < netDepth; i++) { + + TString layerName = gTools().xmlengine().GetNodeName(layerXML); + + // case of dense layer + if (layerName == "DenseLayer") { + + // read width and activation function and then we can create the layer + size_t width = 0; + gTools().ReadAttr(layerXML, "Width", width); + + // Read activation function. + TString funcString; + gTools().ReadAttr(layerXML, "ActivationFunction", funcString); + EActivationFunction func = static_cast(funcString.Atoi()); + + + discriminatorFNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability + } + + // Convolutional Layer + else if (layerName == "ConvLayer") { + + // read width and activation function and then we can create the layer + size_t depth = 0; + gTools().ReadAttr(layerXML, "Depth", depth); + size_t fltHeight, fltWidth = 0; + size_t strideRows, strideCols = 0; + size_t padHeight, padWidth = 0; + gTools().ReadAttr(layerXML, "FilterHeight", fltHeight); + gTools().ReadAttr(layerXML, "FilterWidth", fltWidth); + gTools().ReadAttr(layerXML, "StrideRows", strideRows); + gTools().ReadAttr(layerXML, "StrideCols", strideCols); + gTools().ReadAttr(layerXML, "PaddingHeight", padHeight); + gTools().ReadAttr(layerXML, "PaddingWidth", padWidth); + + // Read activation function. + TString funcString; + gTools().ReadAttr(layerXML, "ActivationFunction", funcString); + EActivationFunction actFunction = static_cast(funcString.Atoi()); + + + discriminatorFNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols, + padHeight, padWidth, actFunction); + + } + + // MaxPool Layer + else if (layerName == "MaxPoolLayer") { + + // read maxpool layer info + size_t frameHeight, frameWidth = 0; + size_t strideRows, strideCols = 0; + gTools().ReadAttr(layerXML, "FrameHeight", frameHeight); + gTools().ReadAttr(layerXML, "FrameWidth", frameWidth); + gTools().ReadAttr(layerXML, "StrideRows", strideRows); + gTools().ReadAttr(layerXML, "StrideCols", strideCols); + + discriminatorFNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + } + else if (layerName == "ReshapeLayer") { + + // read reshape layer info + size_t depth, height, width = 0; + gTools().ReadAttr(layerXML, "Depth", depth); + gTools().ReadAttr(layerXML, "Height", height); + gTools().ReadAttr(layerXML, "Width", width); + int flattening = 0; + gTools().ReadAttr(layerXML, "Flattening",flattening ); + + discriminatorFNet->AddReshapeLayer(depth, height, width, flattening); + + } + else if (layerName == "RNNLayer") { + + std::cout << "add RNN layer " << std::endl; + + // read reshape layer info + size_t stateSize,inputSize, timeSteps = 0; + int rememberState= 0; + gTools().ReadAttr(layerXML, "StateSize", stateSize); + gTools().ReadAttr(layerXML, "InputSize", inputSize); + gTools().ReadAttr(layerXML, "TimeSteps", timeSteps); + gTools().ReadAttr(layerXML, "RememberState", rememberState ); + + discriminatorFNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState); + + } + + // read eventually weights and biases + discriminatorFNet->GetLayers().back()->ReadWeightsFromXML(layerXML); + + // read next layer + layerXML = gTools().GetNextChild(layerXML); + } +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::ReadWeightsFromStream(std::istream & /*istr*/) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +const Ranking *TMVA::MethodGAN::CreateRanking() +{ + // TODO + return NULL; +} + +//////////////////////////////////////////////////////////////////////////////// +void MethodGAN::GetHelpMessage() const +{ + // TODO +} + +} // namespace TMVA diff --git a/tmva/tmva/test/DNN/CNN/CMakeLists.txt b/tmva/tmva/test/DNN/CNN/CMakeLists.txt index 6a7d8cf2b0361..b44e9ad74d749 100644 --- a/tmva/tmva/test/DNN/CNN/CMakeLists.txt +++ b/tmva/tmva/test/DNN/CNN/CMakeLists.txt @@ -71,9 +71,10 @@ ROOT_ADD_TEST(TMVA-DNN-CNN-Minimization-CPU COMMAND testDLMinimizationCpu) ROOT_EXECUTABLE(testConvBackpropagationCpu TestConvBackpropagation.cxx LIBRARIES ${Libraries}) ROOT_ADD_TEST(TMVA-DNN-CNN-Backpropagation-CPU COMMAND testConvBackpropagationCpu) +#ROOT_EXECUTABLE(testMethodDLCpu TestMethodDL.cxx LIBRARIES ${Libraries}) +#ROOT_ADD_TEST(TMVA-DNN-CNN-MethodDL-CPU COMMAND testMethodDLCpu) -ROOT_EXECUTABLE(testMethodDLCpu TestMethodDL.cxx LIBRARIES ${Libraries}) -ROOT_ADD_TEST(TMVA-DNN-CNN-MethodDL-CPU COMMAND testMethodDLCpu) - +ROOT_EXECUTABLE(testMethodGAN TestMethodGAN.cxx LIBRARIES ${Libraries}) +ROOT_ADD_TEST(TMVA-DNN-CNN-MethodGAN COMMAND testMethodGAN) endif () diff --git a/tmva/tmva/test/DNN/CNN/TestMethodGAN.cxx b/tmva/tmva/test/DNN/CNN/TestMethodGAN.cxx new file mode 100644 index 0000000000000..b2c3229701f20 --- /dev/null +++ b/tmva/tmva/test/DNN/CNN/TestMethodGAN.cxx @@ -0,0 +1,41 @@ +// @(#)root/tmva/tmva/cnn:$Id$ +// Author: Anushree Rankawat + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Testing Method GAN * + * * + * Authors (alphabetical): * + * Anushree Rankawat * + * * + * Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#include "TestMethodGAN.h" +#include "TString.h" + +int main() +{ + std::cout << "Testing Method GAN for CPU backend: " << std::endl; + + TString archCPU = "CPU"; + + testMethodGAN_DNN(archCPU); + testCreateNoisyMatrices(); + testCreateDiscriminatorFakeData(); + testCombineGAN(); + +} diff --git a/tmva/tmva/test/DNN/CNN/TestMethodGAN.h b/tmva/tmva/test/DNN/CNN/TestMethodGAN.h new file mode 100644 index 0000000000000..71279992ed1fd --- /dev/null +++ b/tmva/tmva/test/DNN/CNN/TestMethodGAN.h @@ -0,0 +1,533 @@ +// @(#)root/tmva/tmva/cnn:$Id$ +// Author: Anushree Rankawat + +/********************************************************************************** + * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * + * Package: TMVA * + * Class : * + * Web : http://tmva.sourceforge.net * + * * + * Description: * + * Testing MethodGAN for Generative Adversarial Networks * + * * + * Authors (alphabetical): * + * Anushree Rankawat * + * * + * Copyright (c) 2005-2015: * + * CERN, Switzerland * + * U. of Victoria, Canada * + * MPI-K Heidelberg, Germany * + * U. of Bonn, Germany * + * * + * Redistribution and use in source and binary forms, with or without * + * modification, are permitted according to the terms listed in LICENSE * + * (http://tmva.sourceforge.net/LICENSE) * + **********************************************************************************/ + +#ifndef TMVA_TEST_DNN_TEST_CNN_TEST_METHOD_GAN_H +#define TMVA_TEST_DNN_TEST_CNN_TEST_METHOD_GAN_H + +#include "TFile.h" +#include "TTree.h" +#include "TString.h" +#include "TROOT.h" + +#include "TMVA/MethodGAN.h" +#include "TMVA/DataLoader.h" +#include "TMVA/Factory.h" +#include "TMVA/Config.h" +#include "TMVA/ClassifierFactory.h" + +#include + +using DeepNet_t = TMVA::DNN::TDeepNet; +using ArchitectureImpl_t = TMVA::DNN::TCpu; +using DeepNetImpl_t = TMVA::DNN::TDeepNet; + +/** Testing the entire pipeline of the Method GAN*/ +//______________________________________________________________________________ + + +void testMethodGAN_DNN(TString architectureStr) +{ + TFile *input(0); + TString fname = "/home/anushree/GSoC/DataCreation/mnist_original1.root"; + + input = TFile::Open( fname ); + + TTree *signalTree = (TTree*)input->Get("train_sig"); + TTree *background = (TTree*)input->Get("train_bkg"); + + // Create a ROOT output file where TMVA will store ntuples, histograms, etc. + TString outfileName( "TMVA_MethodGAN.root" ); + TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); + + TMVA::Factory *factory = new TMVA::Factory( "TMVAGAN", outputFile, + "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); + + TMVA::DataLoader *dataloader = new TMVA::DataLoader("dataset"); + + // global event weights per tree (see below for setting event-wise weights) + Double_t signalWeight = 1.0; + Double_t backgroundWeight = 1.0; + + // You can add an arbitrary number of signal or background trees + dataloader->AddSignalTree ( signalTree, signalWeight ); + dataloader->AddBackgroundTree( background, backgroundWeight ); + + // Apply additional cuts on the signal and background samples (can be different) + TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; + TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; + + + for (int i = 0; i < 28; ++i) { + for (int j = 0; j < 28; ++j) { + int ivar=i*28+j; + TString varName = TString::Format("x%d",ivar); + dataloader->AddVariable(varName,'F'); + } + } + + dataloader->PrepareTrainingAndTestTree( mycuts, mycutb, + "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" ); + + // Input Layout + TString inputLayoutString("InputLayout=1|1|784##1|1|784"); + + // Batch Layout + TString batchLayoutString("BatchLayout=32|1|784##32|1|784"); + + //General Layout + TString layoutString ("Layout=RESHAPE|1|1|784|FLAT,DENSE|256|RELU,DENSE|512|RELU,DENSE|1024|RELU,DENSE|784|TANH##RESHAPE|1|1|784|FLAT,DENSE|512|RELU,DENSE|256|RELU,DENSE|1|SIGMOID"); + + // Training strategies. + TString training0("MaxEpochs=2,GeneratorLearningRate=2e-4,GeneratorMomentum=0.9,GeneratorRepetitions=1," + "GeneratorConvergenceSteps=20,GeneratorBatchSize=32,GeneratorTestRepetitions=10," + "GeneratorWeightDecay=1e-4,GeneratorRegularization=L2," + "GeneratorDropConfig=0.0+0.5+0.5+0.5, GeneratorMultithreading=True," + "DiscriminatorLearningRate=2e-4,DiscriminatorMomentum=0.9,DiscriminatorRepetitions=1," + "DiscriminatorConvergenceSteps=20,DiscriminatorBatchSize=32,DiscriminatorTestRepetitions=10," + "DiscriminatorWeightDecay=1e-4,DiscriminatorRegularization=L2," + "DiscriminatorDropConfig=0.0+0.5+0.5+0.5, DiscriminatorMultithreading=True"); + TString training1("MaxEpochs=2,GeneratorLearningRate=2e-5,GeneratorMomentum=0.9,GeneratorRepetitions=1," + "GeneratorConvergenceSteps=20,GeneratorBatchSize=32,GeneratorTestRepetitions=10," + "GeneratorWeightDecay=2e-5,GeneratorRegularization=L2," + "GeneratorDropConfig=0.0+0.0+0.0+0.0, GeneratorMultithreading=True," + "DiscriminatorLearningRate=1e-5,DiscriminatorMomentum=0.9,DiscriminatorRepetitions=1," + "DiscriminatorConvergenceSteps=20,DiscriminatorBatchSize=32,DiscriminatorTestRepetitions=10," + "DiscriminatorWeightDecay=1e-4,DiscriminatorRegularization=L2," + "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); + TString training2("MaxEpochs=2,GeneratorLearningRate=2e-6,GeneratorMomentum=0.0,GeneratorRepetitions=1," + "GeneratorConvergenceSteps=20,GeneratorBatchSize=32,GeneratorTestRepetitions=10," + "GeneratorWeightDecay=2e-6,GeneratorRegularization=L2," + "GeneratorDropConfig=0.0+0.0+0.0+0.0, GeneratorMultithreading=True," + "DiscriminatorLearningRate=1e-6,DiscriminatorMomentum=0.0,DiscriminatorRepetitions=1," + "DiscriminatorConvergenceSteps=20, DiscriminatorBatchSize=32, DiscriminatorTestRepetitions=10," + "DiscriminatorWeightDecay=1e-4, DiscriminatorRegularization=L2," + "DiscriminatorDropConfig=0.0+0.0+0.0+0.0, DiscriminatorMultithreading=True"); + TString trainingStrategyString ("TrainingStrategy="); + trainingStrategyString += training0 + "|" + training1 + "|" + training2; + + // General Options.DataSet + TString ganOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:" + "WeightInitialization=XAVIERUNIFORM"); + + ganOptions.Append(":"); + ganOptions.Append(inputLayoutString); + + ganOptions.Append(":"); + ganOptions.Append(batchLayoutString); + ganOptions.Append (":"); + ganOptions.Append (layoutString); + ganOptions.Append (":"); + ganOptions.Append (trainingStrategyString); + + TString cpuOptions = ganOptions + architectureStr; + factory->BookMethod(dataloader, TMVA::Types::kGAN, "GAN", cpuOptions); + + // Train MVAs using the set of training events + factory->TrainAllMethods(); + + // Save the output + outputFile->Close(); + + std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; + std::cout << "==> TMVAGAN is done!" << std::endl; + + delete factory; + delete dataloader; +} + +TMVA::DNN::TDeepNet* createAlexNet(size_t outputWidth) +{ + /*AlexNet Architecture!!*/ + + printf("Initialize AlexNet\n"); + + /* TDeepNet(size_t BatchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero, ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);*/ + TMVA::DNN::TDeepNet *deepNet = new TMVA::DNN::TDeepNet(2, 3, 227, 227, 2, 3, 51529, ELossFunction::kCrossEntropy, EInitialization::kGauss, ERegularization::kL2, 0.0001, false); + + EActivationFunction activationFunction = EActivationFunction::kRelu; + + /* TConvLayer *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + TConvLayer *convLayer1 = deepNet->AddConvLayer(96, 11, 11, 4, 4, 0, 0, activationFunction); + convLayer1->Initialize(); + + /* TMaxPoolLayer *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability = 1.0); */ + TMaxPoolLayer *maxPoolLayer1 = deepNet->AddMaxPoolLayer(3, 3, 2, 2, 0.0); + maxPoolLayer1->Initialize(); + + /* TConvLayer *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + TConvLayer *convLayer2 = deepNet->AddConvLayer(256, 5, 5, 1, 1, 2, 2, activationFunction); + convLayer2->Initialize(); + + /* TMaxPoolLayer *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability = 1.0); */ + TMaxPoolLayer *maxPoolLayer2 = deepNet->AddMaxPoolLayer(3, 3, 2, 2, 0.0); + maxPoolLayer2->Initialize(); + + /* TConvLayer *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + TConvLayer *convLayer3 = deepNet->AddConvLayer(384, 3, 3, 1, 1, 1, 1, activationFunction); + convLayer3->Initialize(); + + /* TConvLayer *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + TConvLayer *convLayer4 = deepNet->AddConvLayer(384, 3, 3, 1, 1, 1, 1, activationFunction); + convLayer4->Initialize(); + + /* TConvLayer *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + TConvLayer *convLayer5 = deepNet->AddConvLayer(256, 3, 3, 1, 1, 1, 1, activationFunction); + convLayer5->Initialize(); + + /* TMaxPoolLayer *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability = 1.0); */ + TMaxPoolLayer *maxPoolLayer3 = deepNet->AddMaxPoolLayer(3, 3, 2, 2, 0.0); + maxPoolLayer3->Initialize(); + + size_t depthReshape = 1; + size_t heightReshape = 1; + size_t widthReshape = deepNet->GetLayerAt(deepNet->GetDepth() - 1)->GetDepth() * + deepNet->GetLayerAt(deepNet->GetDepth() - 1)->GetHeight() * + deepNet->GetLayerAt(deepNet->GetDepth() - 1)->GetWidth(); + + deepNet->AddReshapeLayer(depthReshape, heightReshape, widthReshape, true); + + /* TDenseLayer *AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + TDenseLayer *denseLayer1 = deepNet->AddDenseLayer(512, activationFunction, 0.0); + denseLayer1->Initialize(); + + // Removed since computations taken after inclusion of this layer was high + /* TDenseLayer *AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + //TDenseLayer *denseLayer2 = deepNet->AddDenseLayer(4096, activationFunction, 0.0); + //denseLayer2->Initialize(); + + /* TDenseLayer *AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0);*/ + TDenseLayer *denseLayer3 = deepNet->AddDenseLayer(outputWidth, activationFunction, 0.0); + denseLayer3->Initialize(); + + printf("Initialization of AlexNet ends\n"); + + return deepNet; +} + + +void testCreateNoisyMatrices() +{ + double convergenceSteps = 100; + double batchSize = 2; + double maxEpochs = 2000; + double testInterval = 7; + double learningRate = 1e-5; + double momentum = 0.3; + double weightDecay = 1e-4; + bool multiThreading = false; + + DNN::EInitialization weightInitialization = EInitialization::kGauss; ///< The initialization method + DNN::EOutputFunction outputFunction; ///< The output function for making the predictions + DNN::ELossFunction lossFunction = ELossFunction::kCrossEntropy; + DNN::ERegularization regularization = ERegularization::kL2; + EActivationFunction activationFunction = EActivationFunction::kRelu; + + size_t nSamples = 500; + size_t nChannels = 3; + size_t nImgHeight = 227; + size_t nImgWidth = 227; + + size_t inputDepth = 3; + size_t inputHeight = 227; + size_t inputWidth = 227; + + size_t batchDepth = batchSize; + size_t batchHeight = nChannels; + size_t batchWidth = nImgHeight * nImgWidth; + size_t nOutputs = 1; + + //Get model for training + TMVA::DNN::TDeepNet *deepNet = createAlexNet(batchWidth); + + // print the created network + std::cout << "***** Deep Learning Network *****\n"; + deepNet->Print(); + + //Class Label is 0 for fake data + size_t fakeClassLabel = 0.0; + + std::vector> inputTensor; + inputTensor.reserve(nSamples); + + TMatrixT outputMatrix(nSamples, nOutputs); + TMatrixT weights(nSamples, 1); + + TString jobName = "TestCreateNoisyMatrices"; + TString methodTitle = "MethodGAN"; + TString optionString = ""; + TString emptyString = ""; + TString typeString = "kGAN"; + + DataSetInfo dataInfo(emptyString); + + MethodGAN objectGAN(jobName, methodTitle, dataInfo, optionString); + objectGAN.SetupMethod(); + objectGAN.CreateNoisyMatrices(inputTensor, outputMatrix, weights, *deepNet, nSamples, fakeClassLabel); + + size_t weightRowVal = weights.GetNrows(); + size_t weightColVal = weights.GetNcols(); + size_t outputMatrixRowVal = outputMatrix.GetNrows(); + size_t outputMatrixColVal = outputMatrix.GetNcols(); + + for(int row = 0; row < outputMatrixRowVal; row++){ + for(int col = 0; col < outputMatrixColVal; col++){ + R__ASSERT(outputMatrix(row,col) == fakeClassLabel); + } + } + + for(int row = 0; row < weightRowVal; row++){ + for(int col = 0; col < weightColVal; col++){ + R__ASSERT(weights(row,col) == 1); + } + } +} + +void testCreateDiscriminatorFakeData() +{ + double convergenceSteps = 100; + double batchSize = 2; + double maxEpochs = 2000; + double testInterval = 7; + double learningRate = 1e-5; + double momentum = 0.3; + double weightDecay = 1e-4; + bool multiThreading = false; + + DNN::EInitialization weightInitialization = EInitialization::kGauss; ///< The initialization method + DNN::EOutputFunction outputFunction = EOutputFunction::kSigmoid; ///< The output function for making the predictions + DNN::ELossFunction lossFunction = ELossFunction::kCrossEntropy; + DNN::ERegularization regularization = ERegularization::kL2; + EActivationFunction activationFunction = EActivationFunction::kRelu; + + size_t nSamples = 1; + size_t nChannels = 3; + size_t nImgHeight = 227; + size_t nImgWidth = 227; + + size_t inputDepth = 3; + size_t inputHeight = 227; + size_t inputWidth = 227; + + size_t outputWidth = 1; + + size_t batchDepth = batchSize; + size_t batchHeight = nChannels; + size_t batchWidth = nImgHeight * nImgWidth; + size_t nOutputs = 1; + size_t nThreads = 1; + size_t epoch = 1; + + //Class Label is 0 for fake data + size_t fakeClassLabel = 0.0; + + //Get model for training + TMVA::DNN::TDeepNet *genDeepNet = createAlexNet(batchWidth); + + // print the created network + std::cout << "*****Generator Deep Learning Network *****\n"; + genDeepNet->Print(); + + std::vector> genInputTensor; + genInputTensor.reserve(nSamples); + + TMatrixT genOutputMatrix(nSamples, nOutputs); + TMatrixT genWeights(nSamples, 1); + + for (size_t i = 0; i < nSamples; i++) + { + genInputTensor.emplace_back(batchHeight, batchWidth); + } + + size_t m, n; + TRandom rand(clock()); + Double_t sigma = sqrt(10.0); + + for (size_t i = 0; i < nSamples; i++) + { + m = genInputTensor[0].GetNrows(); + n = genInputTensor[0].GetNcols(); + for (size_t j = 0; j < m; j++) { + for (size_t k = 0; k < n; k++) { + genInputTensor[0](j, k) = rand.Gaus(0.0, sigma); + } + } + } + + // Create the output + for (size_t i = 0; i < nSamples; i++) + { + // Class of fake data is 1 + genOutputMatrix(i, 0) = fakeClassLabel; + } + + // Create the weights + for (size_t i = 0; i < nSamples; i++) + { + genWeights(i, 0) = 1; + } + + TensorInput generatorTuple(genInputTensor, genOutputMatrix, genWeights); + + // Loading the training and testing datasets + TTensorDataLoader generatorData(generatorTuple, nSamples, batchSize, + batchDepth, batchHeight, batchWidth, outputWidth, nThreads); + + TMatrixT disOutputMatrix(nSamples, nOutputs); + TMatrixT disWeights(nSamples, 1); + + std::vector> disPredTensor; + disPredTensor.reserve(nSamples); + + //Get model for training + TMVA::DNN::TDeepNet *disDeepNet = createAlexNet(nOutputs); + + // print the created network + std::cout << "*****Discriminator Deep Learning Network *****\n"; + disDeepNet->Print(); + + TString jobName = "TestCreateDiscriminatorFakeData"; + TString methodTitle = "MethodGAN"; + TString optionString = ""; + TString emptyString = ""; + TString typeString = "kGAN"; + + DataSetInfo dataInfo(emptyString); + + MethodGAN objectGAN(jobName, methodTitle, dataInfo, optionString); + objectGAN.SetupMethod(); + objectGAN.CreateDiscriminatorFakeData(disPredTensor, disOutputMatrix, disWeights, + generatorData, *genDeepNet, *disDeepNet, outputFunction, nSamples, fakeClassLabel, epoch); + + size_t disWeightRowVal = disWeights.GetNrows(); + size_t disWeightColVal = disWeights.GetNcols(); + size_t disOutputMatrixRowVal = disOutputMatrix.GetNrows(); + size_t disOutputMatrixColVal = disOutputMatrix.GetNcols(); + size_t sizeDisPredTensor = disPredTensor.size(); + size_t disPredTensorRowVal = disPredTensor[0].GetNrows(); + size_t disPredTensorColVal = disPredTensor[0].GetNcols(); + + R__ASSERT(sizeDisPredTensor == nSamples); + R__ASSERT(disPredTensorRowVal == genDeepNet->GetBatchSize()); + R__ASSERT(disPredTensorColVal == genDeepNet->GetOutputWidth()); + + for(int row = 0; row < disOutputMatrixRowVal; row++){ + for(int col = 0; col < disOutputMatrixColVal; col++){ + R__ASSERT(disOutputMatrix(row,col) == fakeClassLabel); + } + } + + for(int row = 0; row < disWeightRowVal; row++){ + for(int col = 0; col < disWeightColVal; col++){ + R__ASSERT(disWeights(row,col) == 1); + } + } +} + +void testCombineGAN() +{ + double convergenceSteps = 100; + double batchSize = 2; + double maxEpochs = 2000; + double testInterval = 7; + double learningRate = 1e-5; + double momentum = 0.3; + double weightDecay = 1e-4; + bool multiThreading = false; + + DNN::EInitialization weightInitialization = EInitialization::kGauss; ///< The initialization method + DNN::EOutputFunction outputFunction = EOutputFunction::kSigmoid; ///< The output function for making the predictions + DNN::ELossFunction lossFunction = ELossFunction::kCrossEntropy; + DNN::ERegularization regularization = ERegularization::kL2; + EActivationFunction activationFunction = EActivationFunction::kRelu; + + size_t nSamples = 1; + size_t nChannels = 3; + size_t nImgHeight = 227; + size_t nImgWidth = 227; + + size_t inputDepth = 3; + size_t inputHeight = 227; + size_t inputWidth = 227; + + size_t outputWidth = 1; + + size_t batchDepth = batchSize; + size_t batchHeight = nChannels; + size_t batchWidth = nImgHeight * nImgWidth; + size_t nOutputs = 1; + size_t nThreads = 1; + size_t combineLayerNum = 0; + + //Class Label is 0 for fake data + size_t fakeClassLabel = 0.0; + + //Get model for training + TMVA::DNN::TDeepNet *genDeepNet = createAlexNet(batchWidth); + + // print the created network + std::cout << "*****Discriminator Deep Learning Network *****\n"; + genDeepNet->Print(); + + //Get model for training + TMVA::DNN::TDeepNet *disDeepNet = createAlexNet(nOutputs); + + // print the created network + std::cout << "*****Discriminator Deep Learning Network *****\n"; + disDeepNet->Print(); + + DeepNet_t combinedDeepNet(genDeepNet->GetBatchSize(), genDeepNet->GetInputDepth(), genDeepNet->GetInputHeight(), genDeepNet->GetInputWidth(), + genDeepNet->GetBatchDepth(), genDeepNet->GetBatchHeight(), genDeepNet->GetBatchWidth(), lossFunction, weightInitialization, + regularization, weightDecay); + + std::unique_ptr>> combinedNet = std::unique_ptr>> + (new TMVA::DNN::TDeepNet>(1, genDeepNet->GetInputDepth(), + genDeepNet->GetInputHeight(), genDeepNet->GetInputWidth(), + genDeepNet->GetBatchDepth(), genDeepNet->GetBatchHeight(), genDeepNet->GetBatchWidth(), + lossFunction, weightInitialization, regularization, weightDecay)); + + TString jobName = "TestCombineGAN"; + TString methodTitle = "MethodGAN"; + TString optionString = ""; + TString emptyString = ""; + TString typeString = "kGAN"; + + DataSetInfo dataInfo(emptyString); + + MethodGAN objectGAN(jobName, methodTitle, dataInfo, optionString); + objectGAN.SetupMethod(); + objectGAN.CombineGAN(combinedDeepNet, *genDeepNet, *disDeepNet, combinedNet); + + // print the created network + std::cout << "*****Combined Deep Learning Network *****\n"; + combinedDeepNet.Print(); + + combineLayerNum = genDeepNet->GetDepth()+disDeepNet->GetDepth(); + + R__ASSERT(combinedDeepNet.GetDepth() == (combineLayerNum-1)); + +} + +#endif diff --git a/tutorials/mnist.root b/tutorials/mnist.root new file mode 100644 index 0000000000000..aeba8858fd649 Binary files /dev/null and b/tutorials/mnist.root differ diff --git a/tutorials/tmva/TMVAGeneration.C b/tutorials/tmva/TMVAGeneration.C new file mode 100644 index 0000000000000..250a92d78497e --- /dev/null +++ b/tutorials/tmva/TMVAGeneration.C @@ -0,0 +1,355 @@ +/// \file +/// \ingroup tutorial_tmva +/// \notebook -nodraw +/// This macro provides examples for the training and testing of the +/// TMVA generative models. This is specifically for unsupervised learning +/// based models. +/// +/// If the user wants to train a Generative Adversarial Network for unsupervised learning, +/// the following command can be used: +/// +/// root -l ./TMVAGeneration.C\(\"GAN\"\) +/// +/// (note that the backslashes are mandatory) +/// If no method given, a default set of classifiers is used. +/// The output file "TMVA.root" can be analysed with the use of dedicated +/// macros (simply say: root -l ), which can be conveniently +/// invoked through a GUI that will appear at the end of the run of this macro. +/// Launch the GUI via the command: +/// +/// root -l ./TMVAGui.C +/// +/// You can also compile and run the example with the following commands +/// +/// make +/// ./TMVAGeneration +/// +/// where: ` = "method1 method2"` are the TMVA generative model names +/// example: +/// +/// ./TMVAGeneration GAN +/// +/// If no method given, a default set is of classifiers is used +/// +/// - Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis +/// - Package : TMVA +/// - Root Macro: TMVAGeneration +/// +/// \macro_output +/// \macro_code +/// \author Anushree Rankawat + + +#include +#include +#include +#include + +#include "TChain.h" +#include "TFile.h" +#include "TTree.h" +#include "TString.h" +#include "TObjString.h" +#include "TSystem.h" +#include "TROOT.h" + +#include "TMVA/Factory.h" +#include "TMVA/DataLoader.h" +#include "TMVA/Tools.h" +#include "TMVA/TMVAGui.h" + +int TMVAGeneration( TString myMethodList = "" ) +{ + // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc + // if you use your private .rootrc, or run from a different directory, please copy the + // corresponding lines from .rootrc + + // Methods to be processed can be given as an argument; use format: + // + // mylinux~> root -l TMVAGeneration.C\(\"myMethod1,myMethod2,myMethod3\"\) + // Currently there is just a single unsupervised method to be tested i.e., GAN + + //--------------------------------------------------------------- + // This loads the library + TMVA::Tools::Instance(); + + // Default MVA methods to be trained + tested + std::map Use; + + + // + // Neural Networks (all are feed-forward Multilayer Perceptrons) + Use["GAN"] = 0; // Generative Adversarial Networks + // --------------------------------------------------------------- + + std::cout << std::endl; + std::cout << "==> Start TMVAGeneration" << std::endl; + + // Select methods (don't look at this code - not of interest) + if (myMethodList != "") { + for (std::map::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; + + std::vector mlist = TMVA::gTools().SplitString( myMethodList, ',' ); + for (UInt_t i=0; i::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; + std::cout << std::endl; + return 1; + } + Use[regMethod] = 1; + } + } + + // -------------------------------------------------------------------------------------------------- + + // Here the preparation phase begins + + // Read training and test data + // (it is also possible to use ASCII format as input -> see TMVA Users Guide) + TFile *input(0); + TString fname = "~/root/tutorials/mnist.root"; + //if (!gSystem->AccessPathName( fname )) { + input = TFile::Open( fname ); // check if file in local directory exists + /*} + else { + TFile::SetCacheFileDir("."); + //input = TFile::Open("http://root.cern.ch/files/tmva_class_example.root", "CACHEREAD"); + }*/ + if (!input) { + std::cout << "ERROR: could not open data file" << std::endl; + exit(1); + } + std::cout << "--- TMVAGeneration : Using input file: " << input->GetName() << std::endl; + + // Register the training and test trees + + TTree *signalTree = (TTree*)input->Get("train_sig"); + TTree *background = (TTree*)input->Get("train_bkg"); + + // Create a ROOT output file where TMVA will store ntuples, histograms, etc. + TString outfileName( "TMVA.root" ); + TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); + + // Create the factory object. Later you can choose the methods + // whose performance you'd like to investigate. The factory is + // the only TMVA object you have to interact with + // + // The first argument is the base of the name of all the + // weightfiles in the directory weight/ + // + // The second argument is the output file for the training results + // All TMVA output can be suppressed by removing the "!" (not) in + // front of the "Silent" argument in the option string + TMVA::Factory *factory = new TMVA::Factory( "TMVAGeneration", outputFile, + "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); + + TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset"); + // If you wish to modify default settings + // (please check "src/Config.h" to see all available global options) + // + // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; + // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; + + // Define the input variables that shall be used for the MVA training + // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" + // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] + //dataloader->AddVariable( "myvar1 := var1+var2", 'F' ); + //dataloader->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); + //dataloader->AddVariable( "var3", "Variable 3", "units", 'F' ); + //dataloader->AddVariable( "var4", "Variable 4", "units", 'F' ); + + // You can add so-called "Spectator variables", which are not used in the MVA training, + // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the + // input variables, the response values of all trained MVAs, and the spectator variables + + + + // global event weights per tree (see below for setting event-wise weights) + Double_t signalWeight = 1.0; + Double_t backgroundWeight = 1.0; + + // You can add an arbitrary number of signal or background trees + dataloader->AddSignalTree ( signalTree, signalWeight ); + dataloader->AddBackgroundTree( background, backgroundWeight ); + + // To give different trees for training and testing, do as follows: + // + // dataloader->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); + // dataloader->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); + + // Use the following code instead of the above two or four lines to add signal and background + // training and test events "by hand" + // NOTE that in this case one should not give expressions (such as "var1+var2") in the input + // variable definition, but simply compute the expression before adding the event + // ```cpp + // // --- begin ---------------------------------------------------------- + // std::vector vars( 4 ); // vector has size of number of input variables + // Float_t treevars[4], weight; + // + // // Signal + // for (UInt_t ivar=0; ivar<4; ivar++) signalTree->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); + // for (UInt_t i=0; iGetEntries(); i++) { + // signalTree->GetEntry(i); + // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; + // // add training and test events; here: first half is training, second is testing + // // note that the weight can also be event-wise + // if (i < signalTree->GetEntries()/2.0) dataloader->AddSignalTrainingEvent( vars, signalWeight ); + // else dataloader->AddSignalTestEvent ( vars, signalWeight ); + // } + // + // // Background (has event weights) + // background->SetBranchAddress( "weight", &weight ); + // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); + // for (UInt_t i=0; iGetEntries(); i++) { + // background->GetEntry(i); + // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; + // // add training and test events; here: first half is training, second is testing + // // note that the weight can also be event-wise + // if (i < background->GetEntries()/2) dataloader->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); + // else dataloader->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); + // } + // // --- end ------------------------------------------------------------ + // ``` + // End of tree registration + + // Set individual event weights (the variables must exist in the original TTree) + // - for signal : `dataloader->SetSignalWeightExpression ("weight1*weight2");` + // - for background: `dataloader->SetBackgroundWeightExpression("weight1*weight2");` + //dataloader->SetBackgroundWeightExpression( "weight" ); + + // Apply additional cuts on the signal and background samples (can be different) + TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; + TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; + + + for (int i = 0; i < 28; ++i) { + for (int j = 0; j < 28; ++j) { + int ivar=i*28+j; + TString varName = TString::Format("x%d",ivar); + dataloader->AddVariable(varName,'F'); + } + } + + //dataloader->AddTarget("y",'F'); + // Tell the dataloader how to use the training and testing events + // + // If no numbers of events are given, half of the events in the tree are used + // for training, and the other half for testing: + // + // dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); + // + // To also specify the number of testing events, use: + // + // dataloader->PrepareTrainingAndTestTree( mycut, + // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); + dataloader->PrepareTrainingAndTestTree( mycuts, mycutb, + "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" ); + + // ### Book MVA methods + // + // Please lookup the various method configuration options in the corresponding cxx files, eg: + // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html + // it is possible to preset ranges in the option string in which the cut optimisation should be done: + // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable + + + if(Use["GAN"]) { + + // Input Layout + TString inputLayoutString("InputLayout=1|1|784##1|1|784"); + + // Batch Layout + TString batchLayoutString("BatchLayout=32|1|784##32|1|784"); + + //General Layout + TString layoutString ("Layout=RESHAPE|1|1|784|FLAT,DENSE|256|RELU,DENSE|512|RELU,DENSE|1024|RELU,DENSE|784|TANH##RESHAPE|1|1|784|FLAT,DENSE|512|RELU,DENSE|256|RELU,DENSE|1|SIGMOID"); + + // Training strategies. + TString training0("MaxEpochs=10000,GeneratorLearningRate=2e-4,GeneratorMomentum=0.9,GeneratorRepetitions=1," + "GeneratorConvergenceSteps=20,GeneratorBatchSize=32,GeneratorTestRepetitions=10," + "GeneratorWeightDecay=1e-4,GeneratorRegularization=L2," + "GeneratorDropConfig=0.0+0.5+0.5+0.5, GeneratorMultithreading=True," + "DiscriminatorLearningRate=2e-4,DiscriminatorMomentum=0.9,DiscriminatorRepetitions=1," + "DiscriminatorConvergenceSteps=20,DiscriminatorBatchSize=32,DiscriminatorTestRepetitions=10," + "DiscriminatorWeightDecay=1e-4,DiscriminatorRegularization=L2," + "DiscriminatorDropConfig=0.0+0.5+0.5+0.5, DiscriminatorMultithreading=True"); + TString training1("MaxEpochs=10000,GeneratorLearningRate=2e-5,GeneratorMomentum=0.9,GeneratorRepetitions=1," + "GeneratorConvergenceSteps=20,GeneratorBatchSize=32,GeneratorTestRepetitions=10," + "GeneratorWeightDecay=2e-5,GeneratorRegularization=L2," + "GeneratorDropConfig=0.0+0.0+0.0+0.0, GeneratorMultithreading=True," + "DiscriminatorLearningRate=1e-5,DiscriminatorMomentum=0.9,DiscriminatorRepetitions=1," + "DiscriminatorConvergenceSteps=20,DiscriminatorBatchSize=32,DiscriminatorTestRepetitions=10," + "DiscriminatorWeightDecay=1e-4,DiscriminatorRegularization=L2," + "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True"); + TString training2("MaxEpochs=10000,GeneratorLearningRate=2e-6,GeneratorMomentum=0.0,GeneratorRepetitions=1," + "GeneratorConvergenceSteps=20,GeneratorBatchSize=32,GeneratorTestRepetitions=10," + "GeneratorWeightDecay=2e-6,GeneratorRegularization=L2," + "GeneratorDropConfig=0.0+0.0+0.0+0.0, GeneratorMultithreading=True," + "DiscriminatorLearningRate=1e-6,DiscriminatorMomentum=0.0,DiscriminatorRepetitions=1," + "DiscriminatorConvergenceSteps=20, DiscriminatorBatchSize=32, DiscriminatorTestRepetitions=10," + "DiscriminatorWeightDecay=1e-4, DiscriminatorRegularization=L2," + "DiscriminatorDropConfig=0.0+0.0+0.0+0.0, DiscriminatorMultithreading=True"); + TString trainingStrategyString ("TrainingStrategy="); + trainingStrategyString += training0 + "|" + training1 + "|" + training2; + + // General Options. + TString ganOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:" + "WeightInitialization=XAVIERUNIFORM"); + + ganOptions.Append(":"); + ganOptions.Append(inputLayoutString); + + ganOptions.Append(":"); + ganOptions.Append(batchLayoutString); + ganOptions.Append (":"); + ganOptions.Append (layoutString); + ganOptions.Append (":"); + ganOptions.Append (trainingStrategyString); + + TString cpuOptions = ganOptions + ":Architecture=CPU"; + factory->BookMethod(dataloader, TMVA::Types::kGAN, "GAN", cpuOptions); + } + + + // Now you can tell the factory to train, test, and evaluate the MVAs + // + // Train MVAs using the set of training events + factory->TrainAllMethods(); + + // Evaluate all MVAs using the set of test events + factory->TestAllMethods(); + + // Evaluate and compare performance of all configured MVAs + factory->EvaluateAllMethods(); + + // -------------------------------------------------------------- + + // Save the output + outputFile->Close(); + + std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; + std::cout << "==> TMVAGeneration is done!" << std::endl; + + delete factory; + delete dataloader; + // Launch the GUI for the root macros + if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); + + return 0; +} + +int main( int argc, char** argv ) +{ + // Select methods (don't look at this code - not of interest) + TString methodList; + for (int i=1; i