diff --git a/source/auxiliary/confusion-matrix.cpp b/source/auxiliary/confusion-matrix.cpp index 5779510..3d2b66a 100644 --- a/source/auxiliary/confusion-matrix.cpp +++ b/source/auxiliary/confusion-matrix.cpp @@ -4,10 +4,26 @@ #include #include #include +#include #include "../auxiliary/confusion-matrix.h" #include "../auxiliary/mathematics.h" #include "../service/debug.h" +#include "../common/result.h" + +double ksi::confusion_matrix::TrainF1score (const ksi::result & r) +{ + double recall = 1.0 * (r.TrainPositive2Positive) / (r.TrainPositive2Positive + r.TrainPositive2Negative); + double precision = 1.0 * (r.TrainPositive2Positive) / (r.TrainPositive2Positive + r.TrainNegative2Positive); + return (2.0 * recall * precision) / (recall + precision); +} + +double ksi::confusion_matrix::TestF1score (const ksi::result & r) +{ + double recall = 1.0 * (r.TestPositive2Positive) / (r.TestPositive2Positive + r.TestPositive2Negative); + double precision = 1.0 * (r.TestPositive2Positive) / (r.TestPositive2Positive + r.TestNegative2Positive); + return (2.0 * recall * precision) / (recall + precision); +} std::string ksi::confusion_matrix::ca(int n) @@ -157,6 +173,26 @@ void ksi::confusion_matrix::calculate_statistics ( } } +std::string ksi::confusion_matrix::print(const ksi::result results, const bool print_for_test) +{ + if (print_for_test) + return ksi::confusion_matrix::print(results.TestPositive2Positive, results.TestNegative2Negative, results.TestNegative2Positive, results.TestPositive2Negative); + else + return ksi::confusion_matrix::print(results.TrainPositive2Positive, results.TrainNegative2Negative, results.TrainNegative2Positive, results.TrainPositive2Negative); +} + +double ksi::confusion_matrix::safe_division(const double number, const double divisor) +{ + if (divisor == 0) + return std::numeric_limits::quiet_NaN(); + else + return number / divisor; +} + +double ksi::confusion_matrix::safe(const double d) +{ + return d != 0 ? d : std::numeric_limits::quiet_NaN(); +} std::string ksi::confusion_matrix::print( int nTruePositives, @@ -164,13 +200,11 @@ std::string ksi::confusion_matrix::print( int nFalsePositives, int nFalseNegatives) { - std::stringstream ss; - int CP; // conditional (original) positives int CN; // conditional (original) negatives - int OP; // TestOutcomePositives + int OP; // TestOutcomePositives int ON; // TestOutcomeNegatives; int To; // TotalPopulation @@ -186,34 +220,36 @@ std::string ksi::confusion_matrix::print( int TN = nTrueNegatives; int FP = nFalsePositives; int FN = nFalseNegatives; - double PRE, PPV, FDR, FOR, NPV, LRP, TPR, FPR, ACC, FNR, TNR, DOR, LRN, F1S, BA, TS, NA, FM, dP, BM, MCC, PT, P4; - PRE = (double) CP / To; - PPV = (double) nTruePositives / OP; - FDR = (double) nFalsePositives / OP; - FOR = (double) nFalseNegatives / ON; - NPV = (double) nTrueNegatives / ON; - TPR = (double) nTruePositives / CP; - FNR = (double) nFalsePositives / CP; - FPR = (double) nFalsePositives / CN; - TNR = (double) nTrueNegatives / CN; - ACC = (double) (nTruePositives + nTrueNegatives) / To; + PRE = (double) CP / ksi::confusion_matrix::safe(To); + PPV = (double) nTruePositives / ksi::confusion_matrix::safe(OP); + FDR = (double) nFalsePositives / ksi::confusion_matrix::safe(OP); + FOR = (double) nFalseNegatives / ksi::confusion_matrix::safe(ON); + NPV = (double) nTrueNegatives / ksi::confusion_matrix::safe(ON); + TPR = (double) nTruePositives / ksi::confusion_matrix::safe(CP); + FNR = (double) nFalsePositives / ksi::confusion_matrix::safe(CP); + FPR = (double) nFalsePositives / ksi::confusion_matrix::safe(CN); + TNR = (double) nTrueNegatives / ksi::confusion_matrix::safe(CN); + ACC = (double) (nTruePositives + nTrueNegatives) / ksi::confusion_matrix::safe(To); - F1S = 2 * TPR * PPV / (TPR + PPV); - LRP = TPR / FPR; - LRN = FNR / TNR; - DOR = LRP / LRN; + F1S = 2 * TPR * PPV / ksi::confusion_matrix::safe((TPR + PPV)); + LRP = TPR / ksi::confusion_matrix::safe(FPR); + LRN = FNR / ksi::confusion_matrix::safe(TNR); + DOR = LRP / ksi::confusion_matrix::safe(LRN); - TS = TP / (TP + FN + FP); + TS = TP / ksi::confusion_matrix::safe((TP + FN + FP)); BA = (TPR + TNR) / 2.0; FM = sqrt(PPV * TPR); dP = PPV + NPV - 1; BM = TPR + TNR - 1; - MCC = (TP * TN - FP * FN) / (sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))); - PT = (sqrt(TPR * FPR) - FPR) / (TPR - FPR); - P4 = (4 * TP * TN) / (4 * TP * TN + (TP + TN)*(FP + FN)); + MCC = (TP * TN - FP * FN) / ksi::confusion_matrix::safe((sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)))); + PT = (sqrt(TPR * FPR) - FPR) / ksi::confusion_matrix::safe((TPR - FPR)); + if ((4 * TP * TN + (TP + TN)*(FP + FN)) == 0) + P4 = std::numeric_limits::quiet_NaN(); + else + P4 = (4 * TP * TN) / (4 * TP * TN + (TP + TN)*(FP + FN)); ss << "+------------------+------------------+--------------------+-----------------------+---------------------+" << std::endl << @@ -306,7 +342,7 @@ std::string ksi::confusion_matrix::print( ss << "Positive likelihood ratio (LR+) = TPR / FPR: " << ul(LRP) << std::endl; ss << "Negative likelihood ratio (LR−) = FNR / TNR: " << ul(LRN) << std::endl; ss << "Diagnostic odds ratio (DOR) = LR+ / LR−: " << ul(DOR) << std::endl; - ss << "F1 score = recall * precision / (recall + precision): " << ul(F1S) << std::endl; + ss << "F1 score = 2 * recall * precision / (recall + precision): " << ul(F1S) << std::endl; ss << "Threat score (TS, critical success index CSI, Jaccard index) = TP / (TP + FN + FP): " << ul(TS) << std::endl; ss << "Balanced accuracy (BA) = (TPR + TNR) / 2 : " << ul(BA) << std::endl; ss << "Fowlkes–Mallows index (FM) = sqrt(PPV * TPR): " << ul(FM) << std::endl; diff --git a/source/auxiliary/confusion-matrix.h b/source/auxiliary/confusion-matrix.h index 4f931d1..73bf9f0 100644 --- a/source/auxiliary/confusion-matrix.h +++ b/source/auxiliary/confusion-matrix.h @@ -6,6 +6,8 @@ #include #include +#include "../common/result.h" + namespace ksi { /** This class calculates statistics and prints confusion matrix. */ @@ -66,6 +68,13 @@ namespace ksi int nFalsePositives, int nFalseNegatives); + /** The method prints values of a confusion matrix. It prints all values and a nice matrix. + * @param results + * @param print_for_test if true, the method print a confusion matrix for test data, otherwise -- for train data. + * @date 2024-03-12 + */ + std::string print (const ksi::result results, const bool print_for_test = true); + protected: /** prints a double values with precision to fit the table */ std::string ul(double d); @@ -75,6 +84,27 @@ namespace ksi /** @return true if abs(left - right) < EPSILON EPSILON = 0.0001 */ bool equal (const double left, const double right); + + /** @return quiet_NaN if division impossible, otherwise a legal quotient + * @date 2024-04-01 */ + double safe_division(const double number, const double divisor); + + /** @return quiet_NaN if number is zero, otherwise the number itself + * @date 2024-04-01 */ + double safe(const double number); + + public: + /** @return f1-score for the test results + * @date 2024-03-28 + * @author Krzysztof Siminski */ + static double TestF1score (const ksi::result & r); + + /** @return f1-score for the train results + * @date 2024-03-28 + * @author Krzysztof Siminski */ + static double TrainF1score (const ksi::result & r); + + }; } diff --git a/source/auxiliary/directory.cpp b/source/auxiliary/directory.cpp index 611eb82..9889539 100644 --- a/source/auxiliary/directory.cpp +++ b/source/auxiliary/directory.cpp @@ -8,11 +8,15 @@ bool ksi::directory::create_directory_for_file(const std::string & file_path) { - std::filesystem::path sciezka_pliku (file_path); - auto sciezka_katalogu = sciezka_pliku.parent_path(); - if (not std::filesystem::exists(sciezka_katalogu)) - return std::filesystem::create_directories(sciezka_katalogu); - else - return true; + try + { + std::filesystem::path sciezka_pliku (file_path); + auto sciezka_katalogu = sciezka_pliku.parent_path(); + if (not std::filesystem::exists(sciezka_katalogu)) + return std::filesystem::create_directories(sciezka_katalogu); + else + return true; + } + CATCH; } diff --git a/source/auxiliary/utility-math.h b/source/auxiliary/utility-math.h index aa48b48..ce01cff 100644 --- a/source/auxiliary/utility-math.h +++ b/source/auxiliary/utility-math.h @@ -186,7 +186,6 @@ namespace ksi /** @return The function returns standard deviation and median of elements in a vector. * @param first iterator to the first element * @param last iterator to the past-the-end element in the vector - * @param k k-th smallest (starts with 0) * @date 2023-11-21 * @author Konrad Wnuk * @throw std::string if the array has no items @@ -259,21 +258,21 @@ namespace ksi * @brief Calculates the equation of a line given two points. * * This function calculates the equation of a line in the form \f$y = ax + b\f$, - * where \f$a\f$ is the slope and \f$b\f$ is the y-intercept, based on two given points. + * where \f$a\f$ is the slope and \f$b\f$ is the y-constant term, based on two given points. * * @tparam T The data type of the coordinates (default is double) * @param p1 The first point \f$x, y\f$ * @param p2 The second point \f$x, y\f$ - * @return A pair representing the slope and y-intercept of the line + * @return A pair representing the slope and y-constant term of the line * * The slope \f$a\f$ is calculated as: * \f[ - * \text{slope} = \frac{{y_2 - y_1}}{{x_2 - x_1}} + * a = \frac{{y_2 - y_1}}{{x_2 - x_1}} * \f] * - * The y-intercept \f$b\f$ is calculated as: + * The y-constant term \f$b\f$ is calculated as: * \f[ - * \text{intercept} = y_1 - \text{slope} \cdot x_1 + * b = y_1 - \text{slope} \cdot x_1 * \f] * * @date 2023-12-26 @@ -283,9 +282,9 @@ namespace ksi static std::pair calculateLineEquation(const std::pair& p1, const std::pair& p2) { const double slope = (p2.second - p1.second) / (p2.first - p1.first); - const double intercept = p1.second - slope * p1.first; + const double constant_term = p1.second - slope * p1.first; - return std::make_pair(slope, intercept); + return std::make_pair(slope, constant_term); } /** @@ -310,7 +309,7 @@ namespace ksi T calculateLinearDefiniteIntegralValue(const T& x1, const T& x2, const std::pair& params, const T& expected) { auto f = [params] (const auto& x, const auto& expected) { - return (params.first * pow(x, 4)) / 4 + (params.second * pow(x, 3) - 2 * expected * params.first * pow(x, 3)) / 3 - expected * params.second * pow(x, 2) + (pow(expected, 2) * params.first * pow(x, 2)) / 2 + pow(expected, 2) * params.second * x; + return (params.first * pow(x, 4)) / 4 + (params.second * pow(x, 3) - 2 * expected * params.first * pow(x, 3)) / 3 - expected * params.second * pow(x, 2) + (pow(expected, 2) * params.first * pow(x, 2)) / 2 + pow(expected, 2) * params.second * x; }; return f(x2, expected) - f(x1, expected); @@ -339,10 +338,8 @@ namespace ksi return f(x2, expected) - f(x1, expected); } - }; - - + /** Class for representation of a pair: double, std::size_t with operator< .*/ class distance_index { @@ -350,8 +347,8 @@ namespace ksi double distance; std::size_t index; - bool operator < (const distance_index & right); - + bool operator < (const distance_index & right); }; } #endif + diff --git a/source/auxiliary/utility-string.cpp b/source/auxiliary/utility-string.cpp index adfa0c6..684cde7 100644 --- a/source/auxiliary/utility-string.cpp +++ b/source/auxiliary/utility-string.cpp @@ -49,8 +49,6 @@ std::vector ksi::utility_string::splitString(std::string s, const c return slowa; } - - std::string ksi::utility_string::trimString(std::string s) { std::string white (" \t\f\v\n\r"); diff --git a/source/common/dataset.cpp b/source/common/dataset.cpp index 0c929a7..f493902 100644 --- a/source/common/dataset.cpp +++ b/source/common/dataset.cpp @@ -203,6 +203,18 @@ std::size_t ksi::dataset::size() const return data.size(); } +double ksi::dataset::get_cardinality() const +{ + double cardinality {0.0}; + + for (const auto & p : data) + { + cardinality += p->getWeight(); + } + return cardinality; +} + + std::size_t ksi::dataset::getNumberOfAttributes() const { diff --git a/source/common/dataset.h b/source/common/dataset.h index 1735575..4569ba7 100644 --- a/source/common/dataset.h +++ b/source/common/dataset.h @@ -64,6 +64,10 @@ namespace ksi /** @return returns number of data items in the dataset */ std::size_t size() const; + /** @return sum of weights of all items in the train dataset + @date 2024-03-08 */ + double get_cardinality() const ; + /** @return returns number of attributes in a datum */ std::size_t getNumberOfAttributes() const; @@ -117,8 +121,7 @@ namespace ksi * @date 2019-01-22 */ std::vector> getMatrix(ext_fuzzy_number_gaussian) const ; - - + /** A method get r-th datum from the dataset. The method does not copy the datum! * @return a pointer to the datum or nullptr if r is illegal * @param r index of datum diff --git a/source/descriptors/descriptor-gaussian.cpp b/source/descriptors/descriptor-gaussian.cpp index 410dd62..9784190 100644 --- a/source/descriptors/descriptor-gaussian.cpp +++ b/source/descriptors/descriptor-gaussian.cpp @@ -70,9 +70,10 @@ double ksi::descriptor_gaussian::getMembership(double x) { if (_stddev <= 0.0) { - std::stringstream ss; - ss << "illegal value of fuzzyfication of a gaussian set: " << NAZWA(_stddev) << " == " << _stddev; - throw ss.str(); + // std::stringstream ss; + // ss << "illegal value of fuzzyfication of a gaussian set: " << NAZWA(_stddev) << " == " << _stddev; + // throw ss.str(); + _stddev = 0.000'001; // tiny positive value. } double diff = x - _mean; return last_membership = std::exp(-(diff * diff) / (2 * _stddev * _stddev)); diff --git a/source/descriptors/descriptor-sigmoidal.cpp b/source/descriptors/descriptor-sigmoidal.cpp index 9ba6314..8674d10 100644 --- a/source/descriptors/descriptor-sigmoidal.cpp +++ b/source/descriptors/descriptor-sigmoidal.cpp @@ -56,7 +56,6 @@ double ksi::descriptor_sigmoidal::getCoreMean() const return std::nan(""); } - double ksi::descriptor_sigmoidal::getMembership (double x) { try @@ -82,7 +81,6 @@ ksi::descriptor * ksi::descriptor_sigmoidal::clone() const ksi::descriptor_sigmoidal::~descriptor_sigmoidal() { - } std::ostream& ksi::descriptor_sigmoidal::print(std::ostream& ss) const diff --git a/source/descriptors/descriptor-triangular.cpp b/source/descriptors/descriptor-triangular.cpp index 303a5bf..015d592 100644 --- a/source/descriptors/descriptor-triangular.cpp +++ b/source/descriptors/descriptor-triangular.cpp @@ -147,7 +147,7 @@ std::ostream& ksi::descriptor_triangular::printLinguisticDescription(std::ostrea return ss; } -std::vector< double > ksi::descriptor_triangular::getMAconsequenceParameters() const +std::vector ksi::descriptor_triangular::getMAconsequenceParameters() const { return { _support_min, _core, _support_max }; } diff --git a/source/experiments/exp-005.cpp b/source/experiments/exp-005.cpp index a2ca6ca..7561057 100644 --- a/source/experiments/exp-005.cpp +++ b/source/experiments/exp-005.cpp @@ -1,38 +1,35 @@ /** @file */ -#include +#include #include -#include -#include +#include #include -#include -#include #include - - -#include "../service/debug.h" -#include "../implications/imp-reichenbach.h" -#include "../tnorms/t-norm-product.h" - -#include "../partitions/gk.h" +#include +#include +#include -#include "../neuro-fuzzy/neuro-fuzzy-system.h" -#include "../neuro-fuzzy/ma.h" -#include "../neuro-fuzzy/tsk.h" +#include "../auxiliary/roc.h" +#include "../auxiliary/tempus.h" +#include "../auxiliary/to_string.h" +#include "../auxiliary/utility-math.h" +#include "../implications/imp-reichenbach.h" #include "../neuro-fuzzy/annbfis.h" -#include "../neuro-fuzzy/subspace-annbfis.h" -#include "../neuro-fuzzy/fubi-annbfis.h" -#include "../neuro-fuzzy/tsk_prototype.h" #include "../neuro-fuzzy/annbfis_prototype.h" -#include "../neuro-fuzzy/fac_prototype_minkowski_regression.h" -#include "../neuro-fuzzy/fac_prototype_minkowski_classification.h" -#include "../neuro-fuzzy/fac_prototype_mahalanobis_regression.h" #include "../neuro-fuzzy/fac_prototype_mahalanobis_classification.h" +#include "../neuro-fuzzy/fac_prototype_mahalanobis_regression.h" +#include "../neuro-fuzzy/fac_prototype_minkowski_classification.h" +#include "../neuro-fuzzy/fac_prototype_minkowski_regression.h" +#include "../neuro-fuzzy/fubi-annbfis.h" +#include "../neuro-fuzzy/ma.h" +#include "../neuro-fuzzy/neuro-fuzzy-system.h" +#include "../neuro-fuzzy/subspace-annbfis.h" #include "../neuro-fuzzy/three_way_decision_nfs.h" - -#include "../auxiliary/roc.h" -#include "../auxiliary/tempus.h" -#include "../auxiliary/utility-math.h" +#include "../neuro-fuzzy/tsk.h" +#include "../neuro-fuzzy/tsk_prototype.h" +#include "../partitions/gk.h" +#include "../service/debug.h" +#include "../tnorms/t-norm-product.h" #include "../experiments/exp-005.h" @@ -42,517 +39,579 @@ ksi::exp_005::exp_005() void ksi::exp_005::classification() { - std::cout << "classification" << std::endl; - ksi::imp_reichenbach implication; - ksi::t_norm_product Tnorm; - std::string RESULT_EXTENSION {".txt"}; - - - std::vector thresholds { ksi::roc_threshold::mean, - ksi::roc_threshold::minimal_distance, - ksi::roc_threshold::youden - }; - - const std::string EXPERIMENT ("exp-005"); - const std::string TYPE ("classification"); - const std::string DATA_DIRECTORY ("../data/" + EXPERIMENT + "/" + TYPE); - const std::string RESULTS_DIRECTORY ("../results/" + EXPERIMENT + "/" + TYPE); - - const int NUMBER_OF_RULES = 5; - const int NUMBER_OF_CLUSTERING_ITERATIONS = 100; - const int NUMBER_OF_TUNING_ITERATIONS = 100; - - const bool NORMALISATION = false; - - const double ETA = 0.001; - const double POSITIVE_CLASS_LABEL = 1.0; - const double NEGATIVE_CLASS_LABEL = 0.0; - - // dataset - std::string dataset_name { "haberman" }; - - std::cout << "data set: " << dataset_name << std::endl; - std::string dataset {DATA_DIRECTORY + "/" + dataset_name}; - - std::string results_dir {RESULTS_DIRECTORY + "/" + dataset_name}; - std::string TRAIN (dataset + "/" + dataset_name + ".train"); - std::string TEST (dataset + "/" + dataset_name + ".test"); - std::string RESULTS (results_dir + "/results-" + dataset_name); - - // MA - { - for (auto th : thresholds) // for all thresholds - { - - ksi::ma system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // TSK - { - for (auto th : thresholds) // for all thresholds - { - - ksi::tsk system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // ANNBFIS - { - for (auto th : thresholds) // for all thresholds - { - - ksi::annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // SUBSPACE_ANNBFIS - { - for (auto th : thresholds) // for all thresholds - { - ksi::subspace_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // FUBI_ANNBFIS - { - for (auto th : thresholds) // for all thresholds - { - - ksi::fubi_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); - - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // MINKOWSKI PROTOTYPE PROTO_TSK NEURO-FUZZY CLASSIFIER - { - const double POSITIVE { 1 }; - const double NEGATIVE { 0 }; - - for (auto th : thresholds) // for all thresholds - { - double minkowski_coefficient = 2.0; - ksi::fac_prototype_minkowski_classification factory (minkowski_coefficient, POSITIVE, NEGATIVE); - - ksi::tsk_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory, POSITIVE, NEGATIVE, th); - - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - // MINKOWSKI PROTOTYPE ANNBFIS NEURO-FUZZY CLASSIFIER - { - const double POSITIVE { 1 }; - const double NEGATIVE { 0 }; - const ksi::imp_reichenbach IMPLICATION; - - for (auto th : thresholds) // for all thresholds - { - double minkowski_coefficient = 2.0; - ksi::fac_prototype_minkowski_classification factory (minkowski_coefficient, POSITIVE, NEGATIVE); - - ksi::annbfis_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, IMPLICATION, factory, POSITIVE, NEGATIVE, th); - - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // MAHALANOBIS PROTOTYPE TSK NEURO-FUZZY CLASSIFIER - { - const int NUMBER_OF_RULES = 3; - const double POSITIVE { 1 }; - const double NEGATIVE { 0 }; - - for (auto th : thresholds) // for all thresholds - { - ksi::fac_prototype_mahalanobis_classification factory; - - ksi::gk algorithm; - algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); - algorithm.setNumberOfClusters(NUMBER_OF_RULES); - - ksi::tsk_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory, POSITIVE, NEGATIVE, th); - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // MAHALANOBIS PROTOTYPE ANNBFIS NEURO-FUZZY CLASSIFIER - { - const int NUMBER_OF_RULES = 3; - const double POSITIVE { 1 }; - const double NEGATIVE { 0 }; - const ksi::imp_reichenbach IMPLICATION; - - // auto th = ksi::roc_threshold::mean; /// @todo przywróć pętlę - for (auto th : thresholds) // for all thresholds - { - ksi::fac_prototype_mahalanobis_classification factory; - - //double rho { 1.0 }; - //ksi::gk algorithm(rho); - ksi::gk algorithm; - algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); - algorithm.setNumberOfClusters(NUMBER_OF_RULES); - - // debug(NUMBER_OF_RULES); - ksi::annbfis_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, IMPLICATION, factory, POSITIVE, NEGATIVE, th); - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - - std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - } - - // THREE-WAY DECISION NEURO-FUZZY CLASSIFIER - { - double noncommitment_value = 0.1; // half of width of the noncommitment interval - - for (auto th : thresholds) // for all thresholds - { - // We compose several types of cascades built of neuro-fuzzy systems: - std::vector> systems - { - // ANNBFIS - std::shared_ptr (new ksi::annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), - - // TSK - std::shared_ptr (new ksi::tsk (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), - - // subspace ANNBFIS - std::shared_ptr (new ksi::subspace_annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), - - // FuBi ANNBFIS - std::shared_ptr (new ksi::fubi_annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)) - }; - - - // for all systems we compose cascades: - for (auto & nfs : systems) - { - std::vector> cascade_of_nfs - { - std::shared_ptr (nfs->clone()), - std::shared_ptr (nfs->clone()), - std::shared_ptr (nfs->clone()) - }; - - std::string threshold_name; - switch(th) - { - case ksi::roc_threshold::mean : threshold_name = "mean"; break; - case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; - case ksi::roc_threshold::youden : threshold_name = "youden"; break; - default : threshold_name = "something-wrong-has-happened"; break; - } - - // And we run experiments: - std::string cascade_name; - for (const auto & p : cascade_of_nfs) - cascade_name += std::string{"-"} + p->get_nfs_name(); - - std::string result_file { RESULTS + "-3WDNFS-" + cascade_name + "-" + threshold_name + RESULT_EXTENSION }; - ksi::three_way_decision_nfs system (cascade_of_nfs, TRAIN, TEST, result_file, noncommitment_value); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::cout << "\tthreshold: " << threshold_name << std::endl; - system.experiment_classification(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - - - } - } - } + std::cout << "classification" << std::endl; + ksi::imp_reichenbach implication; + ksi::t_norm_product Tnorm; + std::string RESULT_EXTENSION {".txt"}; + + + std::vector thresholds { ksi::roc_threshold::mean, + ksi::roc_threshold::minimal_distance, + ksi::roc_threshold::youden + }; + + const std::string EXPERIMENT ("exp-005"); + const std::string TYPE ("classification"); + const std::string DATA_DIRECTORY ("../data/" + EXPERIMENT + "/" + TYPE); + const std::string RESULTS_DIRECTORY ("../results/" + EXPERIMENT + "/" + TYPE); + + const int NUMBER_OF_RULES = 5; + const int NUMBER_OF_CLUSTERING_ITERATIONS = 100; + const int NUMBER_OF_TUNING_ITERATIONS = 100; + + const bool NORMALISATION = false; + + const double ETA = 0.001; + const double POSITIVE_CLASS_LABEL = 1.0; + const double NEGATIVE_CLASS_LABEL = 0.0; + + // dataset + std::string dataset_name { "haberman" }; + + std::cout << "data set: " << dataset_name << std::endl; + std::string dataset {DATA_DIRECTORY + "/" + dataset_name}; + + std::string results_dir {RESULTS_DIRECTORY + "/" + dataset_name}; + std::string TRAIN (dataset + "/" + dataset_name + ".train"); + std::string TEST (dataset + "/" + dataset_name + ".test"); + std::string RESULTS (results_dir + "/results-" + dataset_name); + + // MA + { + for (auto th : thresholds) // for all thresholds + { + + ksi::ma system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // TSK + { + for (auto th : thresholds) // for all thresholds + { + + ksi::tsk system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // ANNBFIS + { + for (auto th : thresholds) // for all thresholds + { + + ksi::annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // SUBSPACE_ANNBFIS + { + for (auto th : thresholds) // for all thresholds + { + ksi::subspace_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // FUBI_ANNBFIS + { + for (auto th : thresholds) // for all thresholds + { + + ksi::fubi_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th); + + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // MINKOWSKI PROTOTYPE PROTO_TSK NEURO-FUZZY CLASSIFIER + { + const double POSITIVE { 1 }; + const double NEGATIVE { 0 }; + + for (auto th : thresholds) // for all thresholds + { + double minkowski_coefficient = 2.0; + ksi::fac_prototype_minkowski_classification factory (minkowski_coefficient, POSITIVE, NEGATIVE); + + ksi::tsk_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory, POSITIVE, NEGATIVE, th); + + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // MINKOWSKI PROTOTYPE ANNBFIS NEURO-FUZZY CLASSIFIER + { + const double POSITIVE { 1 }; + const double NEGATIVE { 0 }; + const ksi::imp_reichenbach IMPLICATION; + + for (auto th : thresholds) // for all thresholds + { + double minkowski_coefficient = 2.0; + ksi::fac_prototype_minkowski_classification factory (minkowski_coefficient, POSITIVE, NEGATIVE); + + ksi::annbfis_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, IMPLICATION, factory, POSITIVE, NEGATIVE, th); + + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // MAHALANOBIS PROTOTYPE TSK NEURO-FUZZY CLASSIFIER + { + const int NUMBER_OF_RULES = 3; + const double POSITIVE { 1 }; + const double NEGATIVE { 0 }; + + for (auto th : thresholds) // for all thresholds + { + ksi::fac_prototype_mahalanobis_classification factory; + + ksi::gk algorithm; + algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); + algorithm.setNumberOfClusters(NUMBER_OF_RULES); + + ksi::tsk_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory, POSITIVE, NEGATIVE, th); + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // MAHALANOBIS PROTOTYPE ANNBFIS NEURO-FUZZY CLASSIFIER + { + const int NUMBER_OF_RULES = 3; + const double POSITIVE { 1 }; + const double NEGATIVE { 0 }; + const ksi::imp_reichenbach IMPLICATION; + + // auto th = ksi::roc_threshold::mean; /// @todo przywróć pętlę + for (auto th : thresholds) // for all thresholds + { + ksi::fac_prototype_mahalanobis_classification factory; + + //double rho { 1.0 }; + //ksi::gk algorithm(rho); + ksi::gk algorithm; + algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); + algorithm.setNumberOfClusters(NUMBER_OF_RULES); + + // debug(NUMBER_OF_RULES); + ksi::annbfis_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, IMPLICATION, factory, POSITIVE, NEGATIVE, th); + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + + std::string result_file { RESULTS + "-" + system.get_nfs_name() + "-" + threshold_name + RESULT_EXTENSION }; + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + } + + // THREE-WAY DECISION NEURO-FUZZY CLASSIFIER (single noncommitment value) + { + double noncommitment_value = 0.1; // half of width of the noncommitment interval + + for (auto th : thresholds) // for all thresholds + { + // We compose several types of cascades built of neuro-fuzzy systems: + std::vector> systems + { + // ANNBFIS + std::shared_ptr (new ksi::annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), + + // TSK + std::shared_ptr (new ksi::tsk (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), + + // subspace ANNBFIS + std::shared_ptr (new ksi::subspace_annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), + + // FuBi ANNBFIS + std::shared_ptr (new ksi::fubi_annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)) + }; + + + // for all systems we compose cascades: + for (auto & nfs : systems) + { + std::vector> cascade_of_nfs + { + std::shared_ptr (nfs->clone()), + std::shared_ptr (nfs->clone()), + std::shared_ptr (nfs->clone()) + }; + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + // And we run experiments: + std::string cascade_name; + for (const auto & p : cascade_of_nfs) + cascade_name += std::string{"-"} + p->get_nfs_name(); + cascade_name += std::string{"-"} + std::to_string(noncommitment_value); + + std::string result_file { RESULTS + "-3WDNFS-" + cascade_name + "-" + threshold_name + RESULT_EXTENSION }; + ksi::three_way_decision_nfs system (cascade_of_nfs, TRAIN, TEST, result_file, noncommitment_value); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + + + } + } + } + + // THREE-WAY DECISION NEURO-FUZZY CLASSIFIER (separate noncommitment values) + { + + for (auto th : thresholds) // for all thresholds + { + // We compose several types of cascades built of neuro-fuzzy systems: + std::vector> systems + { + // ANNBFIS + std::shared_ptr (new ksi::annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), + + // TSK + std::shared_ptr (new ksi::tsk (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), + + // subspace ANNBFIS + std::shared_ptr (new ksi::subspace_annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)), + + // FuBi ANNBFIS + std::shared_ptr (new ksi::fubi_annbfis (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication, POSITIVE_CLASS_LABEL, NEGATIVE_CLASS_LABEL, th)) + }; + + + std::vector noncommitment_values = {0.2, 0.1, 0.0}; // half of width of the noncommitment interval + // for all systems we compose cascades: + for (auto & nfs : systems) + { + std::vector> cascade_of_nfs + { + std::shared_ptr (nfs->clone()), + std::shared_ptr (nfs->clone()), + std::shared_ptr (nfs->clone()) + }; + + std::string threshold_name; + switch(th) + { + case ksi::roc_threshold::mean : threshold_name = "mean"; break; + case ksi::roc_threshold::minimal_distance : threshold_name = "minimal_distance"; break; + case ksi::roc_threshold::youden : threshold_name = "youden"; break; + default : threshold_name = "something-wrong-has-happened"; break; + } + + // And we run experiments: + std::string cascade_name; + for (const auto & p : cascade_of_nfs) + cascade_name += std::string{"-"} + p->get_nfs_name(); + cascade_name += std::string{"-"} + ksi::to_string(noncommitment_values); + + std::string result_file { RESULTS + "-3WDNFS-" + cascade_name + "-" + threshold_name + RESULT_EXTENSION }; + ksi::three_way_decision_nfs system (cascade_of_nfs, TRAIN, TEST, result_file, noncommitment_values); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::cout << "\tthreshold: " << threshold_name << std::endl; + system.experiment_classification(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + + + } + } + } } void ksi::exp_005::regression() { - std::cout << std::endl; - std::cout << "regression" << std::endl; - - ksi::imp_reichenbach implication; - ksi::t_norm_product Tnorm; - std::string RESULT_EXTENSION {".txt"}; - - const std::string EXPERIMENT ("exp-005"); - const std::string TYPE ("regression"); - const std::string DATA_DIRECTORY ("../data/" + EXPERIMENT + "/" + TYPE); - const std::string RESULTS_DIRECTORY ("../results/" + EXPERIMENT + "/" + TYPE); - - const int NUMBER_OF_RULES = 5; - const int NUMBER_OF_CLUSTERING_ITERATIONS = 100; - const int NUMBER_OF_TUNING_ITERATIONS = 100; - - const bool NORMALISATION = false; - - const double ETA = 0.001; - - std::string dataset_name { "leukocytes" }; - - std::cout << "data set: " << dataset_name << std::endl; - std::string dataset {DATA_DIRECTORY + "/" + dataset_name}; - - std::string results_dir {RESULTS_DIRECTORY + "/" + dataset_name}; - std::string TRAIN (dataset + "/" + dataset_name + ".train"); - std::string TEST (dataset + "/" + dataset_name + ".test"); - std::string RESULTS (results_dir + "/results-" + dataset_name); - - // MA - { - ksi::ma system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - - // TSK - { - ksi::tsk system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - - // ANNBFIS - { - ksi::annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - - // SUBSPACE_ANNBFIS - { - ksi::subspace_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - - // FUBI_ANNBFIS - { - ksi::fubi_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - - // MINKOWSKI PROTOTYPE TSK - { - const double minkowski_coefficient { 2.0 }; - ksi::fac_prototype_minkowski_regression factory (minkowski_coefficient); - ksi::tsk_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - - // MINKOWSKI PROTOTYPE ANNBFIS - { - const double minkowski_coefficient { 2.0 }; - ksi::fac_prototype_minkowski_regression factory (minkowski_coefficient); - ksi::annbfis_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, ksi::imp_reichenbach(), factory); - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - // MAHALANOBIS PROTOTYPE TSK - { - //const int NUMBER_OF_RULES = 3; - ksi::gk algorithm; - algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); - algorithm.setNumberOfClusters(NUMBER_OF_RULES); - - ksi::fac_prototype_mahalanobis_regression factory; - ksi::tsk_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory); - - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } - // MAHALANOBIS PROTOTYPE ANNBFIS - { - //const int NUMBER_OF_RULES = 3; - ksi::gk algorithm; - algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); - algorithm.setNumberOfClusters(NUMBER_OF_RULES); - - ksi::imp_reichenbach impl; - ksi::fac_prototype_mahalanobis_regression factory; - ksi::annbfis_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, impl, factory); - - std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; - std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; - system.experiment_regression(TRAIN, TEST, result_file); - std::cout << "\tResults saved to file " << result_file << std::endl; - std::cout << std::endl; - } + std::cout << std::endl; + std::cout << "regression" << std::endl; + + ksi::imp_reichenbach implication; + ksi::t_norm_product Tnorm; + std::string RESULT_EXTENSION {".txt"}; + + const std::string EXPERIMENT ("exp-005"); + const std::string TYPE ("regression"); + const std::string DATA_DIRECTORY ("../data/" + EXPERIMENT + "/" + TYPE); + const std::string RESULTS_DIRECTORY ("../results/" + EXPERIMENT + "/" + TYPE); + + const int NUMBER_OF_RULES = 5; + const int NUMBER_OF_CLUSTERING_ITERATIONS = 100; + const int NUMBER_OF_TUNING_ITERATIONS = 100; + + const bool NORMALISATION = false; + + const double ETA = 0.001; + + std::string dataset_name { "leukocytes" }; + + std::cout << "data set: " << dataset_name << std::endl; + std::string dataset {DATA_DIRECTORY + "/" + dataset_name}; + + std::string results_dir {RESULTS_DIRECTORY + "/" + dataset_name}; + std::string TRAIN (dataset + "/" + dataset_name + ".train"); + std::string TEST (dataset + "/" + dataset_name + ".test"); + std::string RESULTS (results_dir + "/results-" + dataset_name); + + // MA + { + ksi::ma system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + + // TSK + { + ksi::tsk system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + + // ANNBFIS + { + ksi::annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + + // SUBSPACE_ANNBFIS + { + ksi::subspace_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + + // FUBI_ANNBFIS + { + ksi::fubi_annbfis system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, Tnorm, implication); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + + // MINKOWSKI PROTOTYPE TSK + { + const double minkowski_coefficient { 2.0 }; + ksi::fac_prototype_minkowski_regression factory (minkowski_coefficient); + ksi::tsk_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + + // MINKOWSKI PROTOTYPE ANNBFIS + { + const double minkowski_coefficient { 2.0 }; + ksi::fac_prototype_minkowski_regression factory (minkowski_coefficient); + ksi::annbfis_prototype system (NUMBER_OF_RULES, NUMBER_OF_CLUSTERING_ITERATIONS, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, ksi::imp_reichenbach(), factory); + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + // MAHALANOBIS PROTOTYPE TSK + { + //const int NUMBER_OF_RULES = 3; + ksi::gk algorithm; + algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); + algorithm.setNumberOfClusters(NUMBER_OF_RULES); + + ksi::fac_prototype_mahalanobis_regression factory; + ksi::tsk_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, factory); + + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } + // MAHALANOBIS PROTOTYPE ANNBFIS + { + //const int NUMBER_OF_RULES = 3; + ksi::gk algorithm; + algorithm.setNumberOfIterations(NUMBER_OF_CLUSTERING_ITERATIONS); + algorithm.setNumberOfClusters(NUMBER_OF_RULES); + + ksi::imp_reichenbach impl; + ksi::fac_prototype_mahalanobis_regression factory; + ksi::annbfis_prototype system (algorithm, NUMBER_OF_TUNING_ITERATIONS, ETA, NORMALISATION, impl, factory); + + std::cout << "\tmethod: " << system.get_nfs_name() << std::endl; + std::string result_file { RESULTS + "-" + system.get_nfs_name() + RESULT_EXTENSION }; + system.experiment_regression(TRAIN, TEST, result_file); + std::cout << "\tResults saved to file " << result_file << std::endl; + std::cout << std::endl; + } } void ksi::exp_005::execute() { - try - { - classification(); - regression(); - } CATCH; + try + { + classification(); + regression(); + } CATCH; - return; + return; } diff --git a/source/makefile b/source/makefile index 50dcfa3..7b29b0a 100644 --- a/source/makefile +++ b/source/makefile @@ -66,9 +66,6 @@ lab : $(release_folder) $(release_folder)/main #----------------------- -# [PL] opcje uruchomienia projektu: -# [EN] project run options: - release : $(release_folder) $(release_folder)/main ./$(release_folder)/main @@ -285,6 +282,10 @@ $(release_folder)/auxiliary-tempus.o : auxiliary/tempus.cpp $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ $(debug_folder)/auxiliary-tempus.o : auxiliary/tempus.cpp $(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^ +$(release_folder)/auxiliary-to_string.o : auxiliary/to_string.cpp + $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ +$(debug_folder)/auxiliary-to_string.o : auxiliary/to_string.cpp + $(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^ $(release_folder)/auxiliary-confusion-matrix.o : auxiliary/confusion-matrix.cpp $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ $(debug_folder)/auxiliary-confusion-matrix.o : auxiliary/confusion-matrix.cpp @@ -984,6 +985,7 @@ $(release_folder)/neuro-fuzzy-granular_ma_regression.o \ $(release_folder)/neuro-fuzzy-granular_tsk_regression.o \ $(release_folder)/neuro-fuzzy-weighted_annbfis.o \ $(release_folder)/neuro-fuzzy-abstract_fcom.o \ +$(release_folder)/auxiliary-to_string.o \ $(release_folder)/tnorms-t-norm-dubois-prade.o \ $(release_folder)/readers-reader-incomplete.o \ $(release_folder)/partitions-rsfcm.o \ @@ -1194,6 +1196,7 @@ $(debug_folder)/neuro-fuzzy-granular_ma_regression.o \ $(debug_folder)/neuro-fuzzy-granular_tsk_regression.o \ $(debug_folder)/neuro-fuzzy-weighted_annbfis.o \ $(debug_folder)/neuro-fuzzy-abstract_fcom.o \ +$(debug_folder)/auxiliary-to_string.o \ $(debug_folder)/tnorms-t-norm-dubois-prade.o \ $(debug_folder)/readers-reader-incomplete.o \ $(debug_folder)/partitions-rsfcm.o \ diff --git a/source/neuro-fuzzy/abstract-annbfis.cpp b/source/neuro-fuzzy/abstract-annbfis.cpp index 2e83bb4..27cad55 100644 --- a/source/neuro-fuzzy/abstract-annbfis.cpp +++ b/source/neuro-fuzzy/abstract-annbfis.cpp @@ -60,7 +60,9 @@ void ksi::abstract_annbfis::createFuzzyRulebase int nClusteringIterations, int nTuningIterations, double eta, - const ksi::dataset& train ) + const ksi::dataset& train, + const ksi::dataset& validation + ) { try { @@ -89,7 +91,6 @@ void ksi::abstract_annbfis::createFuzzyRulebase double dbTheBestRMSE = std::numeric_limits::max(); //////// - std::size_t nAttr = train.getNumberOfAttributes(); std::size_t nAttr_1 = nAttr - 1; @@ -97,9 +98,16 @@ void ksi::abstract_annbfis::createFuzzyRulebase auto trainX = XY.first; auto trainY = XY.second; -// fcm clusterer; -// clusterer.setNumberOfClusters(_nRules); -// clusterer.setNumberOfIterations(_nClusteringIterations); + auto XYval = validation.splitDataSetVertically(nAttr - 1); + auto validateX = XYval.first; + auto validateY = XYval.second; + + auto mvalidateY = validateY.getMatrix(); + auto nValY = validateY.getNumberOfData(); + std::vector wvalidateY (nValY); + for (std::size_t x = 0; x < nValY; x++) + wvalidateY[x] = mvalidateY[x][0]; + //////////////////////// auto podzial = doPartition(trainX); _nRules = podzial.getNumberOfClusters(); @@ -203,14 +211,16 @@ void ksi::abstract_annbfis::createFuzzyRulebase } } + ////////////////////////////////// // test: wyznaczam blad systemu - std::vector wYelaborated (nX); + + std::vector wYelaborated (nValY); for (std::size_t x = 0; x < nX; x++) - wYelaborated[x] = answer( *(trainX.getDatum(x))); + wYelaborated[x] = answer( *(validateX.getDatum(x))); /////////////////////////// ksi::error_RMSE rmse; - double blad = rmse.getError(wY, wYelaborated); + double blad = rmse.getError(wvalidateY, wYelaborated); // std::cout << __FILE__ << " (" << __LINE__ << ") " << "coeff: " << eta << ", iter: " << i << ", RMSE(train): " << blad << std::endl; errors.push_front(blad); @@ -222,7 +232,6 @@ void ksi::abstract_annbfis::createFuzzyRulebase pTheBest = std::unique_ptr(_pRulebase->clone()); } /////////////////////////// - } // system nastrojony :-) // update the rulebase with the best one: diff --git a/source/neuro-fuzzy/abstract-annbfis.h b/source/neuro-fuzzy/abstract-annbfis.h index 23da8f6..e943894 100644 --- a/source/neuro-fuzzy/abstract-annbfis.h +++ b/source/neuro-fuzzy/abstract-annbfis.h @@ -118,6 +118,7 @@ namespace ksi * @param nTuningIterations number of tuning iterations * @param dbLearningCoefficient learning coefficient for gradient method * @param train train dataset + * @param validation validation dataset * @date 2018-12-24 * @author Krzysztof Siminski * @@ -125,7 +126,7 @@ namespace ksi virtual void createFuzzyRulebase ( int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const dataset & train); + const dataset & train, const dataset & validation); public: diff --git a/source/neuro-fuzzy/abstract-ma.cpp b/source/neuro-fuzzy/abstract-ma.cpp index d2171e9..204e539 100644 --- a/source/neuro-fuzzy/abstract-ma.cpp +++ b/source/neuro-fuzzy/abstract-ma.cpp @@ -35,19 +35,26 @@ #include "../service/debug.h" #include "../gan/discriminative_model.h" #include "../gan/generative_model.h" - - - + +ksi::abstract_ma::abstract_ma(const ksi::partitioner& Partitioner) : ksi::abstract_ma::abstract_ma() +{ + if (_pPartitioner) + delete _pPartitioner; + _pPartitioner = Partitioner.clone(); +} + void ksi::abstract_ma::createFuzzyRulebase ( int nClusteringIterations, int nTuningIterations, double eta, - const ksi::dataset & train) + const ksi::dataset & train, + const ksi::dataset & validation + ) { - try + try { - std::deque errors; + std::deque errors; _nClusteringIterations = nClusteringIterations; _nTuningIterations = nTuningIterations; @@ -65,45 +72,47 @@ void ksi::abstract_ma::createFuzzyRulebase std::unique_ptr pTheBest (_pRulebase->clone()); double dbTheBestRMSE = std::numeric_limits::max(); //////// - - std::size_t nX = train.getNumberOfData(); std::size_t nAttr = train.getNumberOfAttributes(); std::size_t nAttr_1 = nAttr - 1; - + auto XY = train.splitDataSetVertically (nAttr - 1); auto trainX = XY.first; auto trainY = XY.second; - //auto podzial = clusterer.doPartition(trainX); -// auto podzialXY = clusterer.doPartition(train); - - // fcm clusterer; -// clusterer.setNumberOfClusters(_nRules); -// clusterer.setNumberOfIterations(_nClusteringIterations); + auto XYval = validation.splitDataSetVertically(nAttr - 1); + auto validateX = XYval.first; + auto validateY = XYval.second; + auto mvalidateY = validateY.getMatrix(); + auto nValY = validateY.getNumberOfData(); + std::vector wvalidateY (nValY); + for (std::size_t x = 0; x < nValY; x++) + wvalidateY[x] = mvalidateY[x][0]; + //////////////////////// + _original_size_of_training_dataset = trainX.getNumberOfData(); auto podzial = doPartition(trainX); - _nRules = podzial.getNumberOfClusters(); + auto typical_items = trainX.get_if_data_typical(_minimal_typicality); trainX.remove_untypical_data(typical_items); trainY.remove_untypical_data(typical_items); + std::size_t nX = trainX.getNumberOfData(); auto trainReduced = train; trainReduced.remove_untypical_data(typical_items); auto podzialXY = doPartition(trainReduced); + ///@todo Czy na pewno dobrze tutaj jest z nX? nX = trainX.getNumberOfData(); _reduced_size_of_training_dataset = nX; // pobranie danych w postaci macierzy: auto wTrainX = trainX.getMatrix(); - auto wTrainY = trainY.getMatrix(); + auto wTrainY = trainY.getMatrix(); - - std::vector wY(nX); for (std::size_t x = 0; x < nX; x++) wY[x] = wTrainY[x][0]; @@ -115,7 +124,9 @@ void ksi::abstract_ma::createFuzzyRulebase auto klaster = podzialXY.getCluster(c); for (std::size_t a = 0; a < nAttr_1; a++) - przeslanka.addDescriptor(klaster->getDescriptor(a)); + { + przeslanka.addDescriptor(klaster->getDescriptor(a)); + } auto needed_descriptor = klaster->getAddressOfDescriptor(nAttr - 1); auto scs = needed_descriptor->getMAconsequenceParameters(); @@ -123,6 +134,7 @@ void ksi::abstract_ma::createFuzzyRulebase regula.setPremise(przeslanka); consequence_MA konkluzja (scs[0], scs[1], scs[2]); regula.setConsequence(konkluzja); + #pragma omp critical _pRulebase->addRule(regula); } @@ -141,15 +153,16 @@ void ksi::abstract_ma::createFuzzyRulebase } _pRulebase->actualise_parameters(eta); + ////////////////////////////////// // test: wyznaczam blad systemu - std::vector wYelaborated (nX); + + std::vector wYelaborated (nValY); for (std::size_t x = 0; x < nX; x++) - wYelaborated[x] = answer( *(trainX.getDatum(x))); + wYelaborated[x] = answer( *(validateX.getDatum(x))); /////////////////////////// ksi::error_RMSE rmse; - double blad = rmse.getError(wY, wYelaborated); - // std::cout << __FILE__ << " (" << __LINE__ << ") " << "coeff: " << eta << ", iter: " << i << ", RMSE(train): " << blad << std::endl; + double blad = rmse.getError(wvalidateY, wYelaborated); errors.push_front(blad); eta = modify_learning_coefficient(eta, errors); // modify learning coefficient @@ -162,7 +175,6 @@ void ksi::abstract_ma::createFuzzyRulebase /////////////////////////// } - // system nastrojony :-) // update the rulebase with the best one: delete _pRulebase; @@ -189,15 +201,12 @@ ksi::number ksi::abstract_ma::elaborate_answer (const ksi::datum & item) const } CATCH; } - ksi::abstract_ma::abstract_ma() { _pPartitioner = nullptr; } - - double ksi::abstract_ma::discriminate(const ksi::datum& d) { return answer(d); @@ -206,15 +215,15 @@ double ksi::abstract_ma::discriminate(const ksi::datum& d) void ksi::abstract_ma::train_discriminative_model(const ksi::dataset & ds) { createFuzzyRulebase( _nClusteringIterations, - _nTuningIterations, _dbLearningCoefficient, ds - ); + _nTuningIterations, _dbLearningCoefficient, ds, ds + ); // validation == train } void ksi::abstract_ma::train_generative_model(const ksi::dataset& ds) { createFuzzyRulebase( _nClusteringIterations, - _nTuningIterations, _dbLearningCoefficient, ds - ); + _nTuningIterations, _dbLearningCoefficient, ds, ds + ); // validation == train } ksi::abstract_ma::abstract_ma(int nRules, @@ -323,7 +332,6 @@ ksi::abstract_ma::abstract_ma(int nRules, _minimal_typicality = dbMinimalTypicality; } - ksi::abstract_ma::~abstract_ma() { // if (_pPartitioner) @@ -363,3 +371,15 @@ ksi::abstract_ma & ksi::abstract_ma::operator= (ksi::abstract_ma && right) } +ksi::partition ksi::abstract_ma::doPartition(const ksi::dataset& X) +{ + try + { + if (_pPartitioner) + return _pPartitioner->doPartition(X); + else + throw ksi::exception ("no clustering method provided"); + } + CATCH; +} + diff --git a/source/neuro-fuzzy/abstract-ma.h b/source/neuro-fuzzy/abstract-ma.h index 047243b..0f46866 100644 --- a/source/neuro-fuzzy/abstract-ma.h +++ b/source/neuro-fuzzy/abstract-ma.h @@ -8,14 +8,15 @@ #include "../common/dataset.h" #include "../common/number.h" -#include "rulebase.h" -#include "neuro-fuzzy-system.h" +#include "../neuro-fuzzy/rulebase.h" +#include "../neuro-fuzzy/neuro-fuzzy-system.h" #include "../implications/implication.h" #include "../tnorms/t-norm.h" #include "../auxiliary/roc.h" +#include "../partitions/partition.h" +#include "../partitions/partitioner.h" #include "../gan/discriminative_model.h" #include "../gan/generative_model.h" -#include "../partitions/partitioner.h" namespace ksi { @@ -24,7 +25,6 @@ namespace ksi */ class abstract_ma : virtual public neuro_fuzzy_system { - public: abstract_ma(); @@ -97,24 +97,31 @@ namespace ksi - protected: + /** A constructor with partitioner + * @date 2024-02-27 */ + abstract_ma (const partitioner & Partitioner); + + protected: /** The method creates a fuzzy rulebase from the dataset. * @param nClusteringIterations number of clustering iterations * @param nTuningIterations number of tuning iterations * @param dbLearningCoefficient learning coefficient for gradient method * @param train train dataset + * @param validation validation dataset * @date 2018-02-16 * @author Krzysztof Siminski */ virtual void createFuzzyRulebase ( int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation + ); /** Function that partitions the data set. @param X dataset to partition @date 2019-12-24 */ - virtual partition doPartition (const dataset & X) = 0; + virtual partition doPartition (const dataset & X); public: diff --git a/source/neuro-fuzzy/abstract-tsk.cpp b/source/neuro-fuzzy/abstract-tsk.cpp index d055fb9..988fedc 100644 --- a/source/neuro-fuzzy/abstract-tsk.cpp +++ b/source/neuro-fuzzy/abstract-tsk.cpp @@ -27,21 +27,32 @@ #include "../service/debug.h" #include "../auxiliary/error-RMSE.h" #include "../auxiliary/error-MAE.h" -#include "../common/number.h" +#include "../common/number.h" #include "../readers/reader-complete.h" #include "../common/data-modifier-normaliser.h" #include "../partitions/partition.h" +#include "../service/debug.h" #include "../gan/discriminative_model.h" #include "../gan/generative_model.h" + +ksi::abstract_tsk::abstract_tsk(const ksi::partitioner& Partitioner) : ksi::abstract_tsk::abstract_tsk() +{ + if (_pPartitioner) + delete _pPartitioner; + _pPartitioner = Partitioner.clone(); +} + + void ksi::abstract_tsk::createFuzzyRulebase (int nClusteringIterations, int nTuningIterations, double eta, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset& validation) { try { - std::deque errors; + std::deque errors; //_nRules = nRules; /// @todo Liczbe regul okresla system podzialu dziedziny! _nClusteringIterations = nClusteringIterations; _nTuningIterations = nTuningIterations; @@ -50,11 +61,11 @@ void ksi::abstract_tsk::createFuzzyRulebase (int nClusteringIterations, if (_pTnorm) delete _pTnorm; _pTnorm = new t_norm_product (); - + if (_pRulebase) delete _pRulebase; _pRulebase = new rulebase(); - + // remember the best rulebase: std::unique_ptr pTheBest (_pRulebase->clone()); double dbTheBestRMSE = std::numeric_limits::max(); @@ -62,16 +73,22 @@ void ksi::abstract_tsk::createFuzzyRulebase (int nClusteringIterations, std::size_t nAttr = train.getNumberOfAttributes(); std::size_t nAttr_1 = nAttr - 1; - + auto XY = train.splitDataSetVertically (nAttr - 1); auto trainX = XY.first; auto trainY = XY.second; -// fcm clusterer; -// clusterer.setNumberOfClusters(_nRules); -// clusterer.setNumberOfIterations(_nClusteringIterations); - // auto podzial = clusterer.doPartition(trainX); + auto XYval = validation.splitDataSetVertically(nAttr - 1); + auto validateX = XYval.first; + auto validateY = XYval.second; + auto mvalidateY = validateY.getMatrix(); + auto nValY = validateY.getNumberOfData(); + std::vector wvalidateY (nValY); + for (std::size_t x = 0; x < nValY; x++) + wvalidateY[x] = mvalidateY[x][0]; + //////////////////////// + _original_size_of_training_dataset = trainX.getNumberOfData(); auto podzial = doPartition(trainX); @@ -83,10 +100,11 @@ void ksi::abstract_tsk::createFuzzyRulebase (int nClusteringIterations, std::size_t nX = trainX.getNumberOfData(); _reduced_size_of_training_dataset = nX; + // pobranie danych w postaci macierzy: auto wTrainX = trainX.getMatrix(); - auto wTrainY = trainY.getMatrix(); - + auto wTrainY = trainY.getMatrix(); + std::vector wY(nX); for (std::size_t x = 0; x < nX; x++) wY[x] = wTrainY[x][0]; @@ -175,15 +193,16 @@ void ksi::abstract_tsk::createFuzzyRulebase (int nClusteringIterations, } } + ////////////////////////////////// // test: wyznaczam blad systemu - std::vector wYelaborated (nX); + + std::vector wYelaborated (nValY); for (std::size_t x = 0; x < nX; x++) - wYelaborated[x] = answer( *(trainX.getDatum(x))); + wYelaborated[x] = answer( *(validateX.getDatum(x))); /////////////////////////// ksi::error_RMSE rmse; - double blad = rmse.getError(wY, wYelaborated); - // std::cout << __FILE__ << " (" << __LINE__ << ") " << "coeff: " << eta << ", iter: " << i << ", RMSE(train): " << blad << std::endl; + double blad = rmse.getError(wvalidateY, wYelaborated); errors.push_front(blad); eta = modify_learning_coefficient(eta, errors); // modify learning coefficient @@ -433,14 +452,14 @@ double ksi::abstract_tsk::discriminate (const ksi::datum & d) void ksi::abstract_tsk::train_discriminative_model (const dataset & ds) { createFuzzyRulebase(_nClusteringIterations, - _nTuningIterations, _dbLearningCoefficient, ds); + _nTuningIterations, _dbLearningCoefficient, ds, ds); // validation == train } void ksi::abstract_tsk::train_generative_model(const ksi::dataset& ds) { createFuzzyRulebase(_nClusteringIterations, - _nTuningIterations, _dbLearningCoefficient, ds); + _nTuningIterations, _dbLearningCoefficient, ds, ds); // validation == train } ksi::datum ksi::abstract_tsk::get_random_datum(std::default_random_engine & engine) @@ -454,4 +473,15 @@ ksi::datum ksi::abstract_tsk::get_random_datum(std::default_random_engine & engi return krotka; } +ksi::partition ksi::abstract_tsk::doPartition(const ksi::dataset& X) +{ + try + { + if (_pPartitioner) + return _pPartitioner->doPartition(X); + else + throw ksi::exception ("no clustering method provided"); + } + CATCH; +} diff --git a/source/neuro-fuzzy/abstract-tsk.h b/source/neuro-fuzzy/abstract-tsk.h index b3fcf79..b8fee90 100644 --- a/source/neuro-fuzzy/abstract-tsk.h +++ b/source/neuro-fuzzy/abstract-tsk.h @@ -26,9 +26,6 @@ namespace ksi */ class abstract_tsk : virtual public neuro_fuzzy_system { - protected: - - public: abstract_tsk(); @@ -139,6 +136,8 @@ namespace ksi abstract_tsk & operator = (abstract_tsk && a); ~abstract_tsk(); + /** A constructor with partitioner * @date 2024-02-27 */ + abstract_tsk (const partitioner & Partitioner); protected: /** The method creates a fuzzy rulebase from the dataset. @@ -146,18 +145,20 @@ namespace ksi * @param nTuningIterations number of tuning iterations * @param dbLearningCoefficient learning coefficient for gradient method * @param train train dataset + * @param validation validation dataset * @date 2018-02-14 * @author Krzysztof Siminski */ virtual void createFuzzyRulebase (int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); /** Function that partitions the data set. @param X dataset to partition @date 2019-12-19 */ - virtual partition doPartition (const dataset & X) = 0; + virtual partition doPartition (const dataset & X); public: /** @@ -175,8 +176,7 @@ namespace ksi virtual number elaborate_answer (const datum & d) const; virtual void train_discriminative_model (const dataset & ds); - virtual void train_generative_model(const ksi::dataset & ds); - + virtual void train_generative_model(const dataset & ds); /** The method elaborates the answer of the discriminative_model for a datum @param d a datum to elaborate answer for @@ -186,7 +186,6 @@ namespace ksi virtual ksi::datum get_random_datum(std::default_random_engine & engine); - }; } diff --git a/source/neuro-fuzzy/annbfis_prototype.cpp b/source/neuro-fuzzy/annbfis_prototype.cpp index 8f98353..541bec5 100644 --- a/source/neuro-fuzzy/annbfis_prototype.cpp +++ b/source/neuro-fuzzy/annbfis_prototype.cpp @@ -140,8 +140,9 @@ ksi::annbfis_prototype::annbfis_prototype(const ksi::partitioner & partitioner, void ksi::annbfis_prototype::createFuzzyRulebase(int nClusteringIterations, int nTuningIterations, - double dbLearningCoefficient, - const ksi::dataset& train) + double eta, + const ksi::dataset& train, + const ksi::dataset& validation) { try { @@ -151,28 +152,48 @@ void ksi::annbfis_prototype::createFuzzyRulebase(int nClusteringIterations, const double INITIAL_W = 2.0; _TrainDataset = train; - + if (_pRulebase) delete _pRulebase; _pRulebase = new rulebase(); + // remember the best rulebase: + std::deque errors; + std::unique_ptr pTheBest (_pRulebase->clone()); + double dbTheBestRMSE = std::numeric_limits::max(); + ////// + std::size_t nAttr = _TrainDataset.getNumberOfAttributes(); std::size_t nAttr_1 = nAttr - 1; - + auto XY = train.splitDataSetVertically (nAttr - 1); auto trainX = XY.first; auto trainY = XY.second; + + auto XYval = validation.splitDataSetVertically(nAttr - 1); + auto validateX = XYval.first; + auto validateY = XYval.second; + auto mvalidateY = validateY.getMatrix(); + auto nValY = validateY.getNumberOfData(); + std::vector wvalidateY (nValY); + for (std::size_t x = 0; x < nValY; x++) + wvalidateY[x] = mvalidateY[x][0]; + //////////////////////// + _original_size_of_training_dataset = trainX.getNumberOfData(); - - auto podzial = doPartition(trainX); - //debug(podzial); - + ksi::partition podzial; + + try + { + podzial = doPartition(trainX); + } CATCH; + std::size_t nX = trainX.getNumberOfData(); // pobranie danych w postaci macierzy: auto wTrainX = trainX.getMatrix(); auto wTrainY = trainY.getMatrix(); - + std::vector wY(nX); for (std::size_t x = 0; x < nX; x++) wY[x] = wTrainY[x][0]; @@ -193,88 +214,107 @@ void ksi::annbfis_prototype::createFuzzyRulebase(int nClusteringIterations, _pRulebase->addRule(regula); } //debug(*_pRulebase); - // elaboration of conclusions: - std::vector> G_przyklad_regula; - - // mam zgrupowane dane, teraz trzeba nastroic system - for (int i = 0; i < _nTuningIterations; i++) - { - if (i % 2 == 0) - { - G_przyklad_regula.clear(); // dla konkluzji - - // strojenie gradientowe - _pRulebase->reset_differentials(); - for (std::size_t x = 0; x < nX; x++) - { - // Uruchomienie strojenia gradiendowego. - double odpowiedz = _pRulebase->answer(wTrainX[x]); -// debug(odpowiedz); - // dla wyznaczania konkluzji: - auto localisation_weight = _pRulebase->get_last_rules_localisations_weights(); - std::vector Gs; - for (auto & p : localisation_weight) - Gs.push_back(p.second); - - G_przyklad_regula.push_back(Gs); - // no i juz zwykla metoda gradientowa - _pRulebase->cummulate_differentials(wTrainX[x], wY[x]); - } - _pRulebase->actualise_parameters(_dbLearningCoefficient); -// debug(G_przyklad_regula); - } - else + try + { + // elaboration of conclusions: + std::vector> G_przyklad_regula; + + // mam zgrupowane dane, teraz trzeba nastroic system + for (int i = 0; i < _nTuningIterations; i++) { - // wyznaczanie wspolczynnikow konkluzji. - least_square_error_regression lser ((nAttr_1 + 1) * _nRules); - - // przygotowanie wektora D - for (std::size_t x = 0; x < nX; x++) + if (i % 2 == 0) + { + G_przyklad_regula.clear(); // dla konkluzji + + // strojenie gradientowe + _pRulebase->reset_differentials(); + for (std::size_t x = 0; x < nX; x++) + { + // Uruchomienie strojenia gradiendowego. + double odpowiedz = _pRulebase->answer(wTrainX[x]); + // debug(odpowiedz); + // dla wyznaczania konkluzji: + auto localisation_weight = _pRulebase->get_last_rules_localisations_weights(); + std::vector Gs; + for (auto & p : localisation_weight) + Gs.push_back(p.second); + + G_przyklad_regula.push_back(Gs); + // no i juz zwykla metoda gradientowa + _pRulebase->cummulate_differentials(wTrainX[x], wY[x]); + } + _pRulebase->actualise_parameters(eta); + // debug(G_przyklad_regula); + } + + else { - auto G_suma = std::accumulate(G_przyklad_regula[x].begin(), - G_przyklad_regula[x].end(), 0.0); -// debug(G_suma); - - std::vector linia((nAttr_1 + 1) * _nRules); - int index = 0; + // wyznaczanie wspolczynnikow konkluzji. + least_square_error_regression lser ((nAttr_1 + 1) * _nRules); + + // przygotowanie wektora D + for (std::size_t x = 0; x < nX; x++) + { + auto G_suma = std::accumulate(G_przyklad_regula[x].begin(), + G_przyklad_regula[x].end(), 0.0); + // debug(G_suma); + + std::vector linia((nAttr_1 + 1) * _nRules); + int index = 0; + for (int r = 0; r < _nRules; r++) + { + auto S = G_przyklad_regula[x][r] / G_suma; + for (std::size_t a = 0; a < nAttr_1; a++) + linia[index++] = S * wTrainX[x][a]; + linia[index++] = S; + } + lser.read_data_item(linia, wY[x]); + } + auto p = lser.get_regression_coefficients(); + + // teraz zapis do regul: +#pragma omp parallel for for (int r = 0; r < _nRules; r++) { - auto S = G_przyklad_regula[x][r] / G_suma; - for (std::size_t a = 0; a < nAttr_1; a++) - linia[index++] = S * wTrainX[x][a]; - linia[index++] = S; + std::vector coeff (nAttr_1 + 1); + + for (std::size_t a = 0; a < nAttr_1 + 1; a++) + coeff[a] = p[r * (nAttr_1 + 1) + a]; + if (ksi::is_valid(coeff)) + { + consequence_CL konkluzja (coeff, INITIAL_W); + (*_pRulebase)[r].setConsequence(konkluzja); + } } - lser.read_data_item(linia, wY[x]); } - auto p = lser.get_regression_coefficients(); - -// debug(p); - // teraz zapis do regul: - #pragma omp parallel for - for (int r = 0; r < _nRules; r++) + ////////////////////////////////// + // test: wyznaczam blad systemu + + std::vector wYelaborated (nValY); + for (std::size_t x = 0; x < nX; x++) + wYelaborated[x] = answer( *(validateX.getDatum(x))); + + /////////////////////////// + ksi::error_RMSE rmse; + double blad = rmse.getError(wvalidateY, wYelaborated); + errors.push_front(blad); + + eta = modify_learning_coefficient(eta, errors); // modify learning coefficient + // remember the best rulebase: + if (dbTheBestRMSE > blad) { - std::vector coeff (nAttr_1 + 1); - - for (std::size_t a = 0; a < nAttr_1 + 1; a++) - coeff[a] = p[r * (nAttr_1 + 1) + a]; - if (ksi::is_valid(coeff)) - { - consequence_CL konkluzja (coeff, INITIAL_W); - (*_pRulebase)[r].setConsequence(konkluzja); - } + dbTheBestRMSE = blad; + pTheBest = std::unique_ptr(_pRulebase->clone()); } + /////////////////////////// } - - // test: wyznaczam blad systemu - std::vector wYelaborated (nX); - #pragma omp parallel for - for (std::size_t x = 0; x < nX; x++) - wYelaborated[x] = answer( *(trainX.getDatum(x))); - - } + } CATCH; // system nastrojony :-) + // update the rulebase with the best one: + delete _pRulebase; + _pRulebase = pTheBest->clone(); } CATCH; } diff --git a/source/neuro-fuzzy/annbfis_prototype.h b/source/neuro-fuzzy/annbfis_prototype.h index 791c1ef..82cbc55 100644 --- a/source/neuro-fuzzy/annbfis_prototype.h +++ b/source/neuro-fuzzy/annbfis_prototype.h @@ -131,7 +131,7 @@ namespace ksi const implication & imp, const ksi::fac_prototype & factory); - virtual void createFuzzyRulebase(int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const ksi::dataset & train) override; + virtual void createFuzzyRulebase(int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const ksi::dataset & train, const ksi::dataset & validation) override; public: ksi::neuro_fuzzy_system * clone() const override; diff --git a/source/neuro-fuzzy/consequence-TSK.cpp b/source/neuro-fuzzy/consequence-TSK.cpp index 151ecbf..39f7cff 100644 --- a/source/neuro-fuzzy/consequence-TSK.cpp +++ b/source/neuro-fuzzy/consequence-TSK.cpp @@ -103,6 +103,6 @@ std::ostream& ksi::consequence_TSK::printLinguisticDescription(std::ostream& ss, int locationIndex = (_params.back() - descStat.average) / descStat.std_dev + TSKLocationDescription.size() / 2; locationIndex = std::min(std::max(locationIndex, 0), int(TSKLocationDescription.size() - 1)); - return ss << " intercept is " << TSKLocationDescription[locationIndex]; + return ss << " constant term is " << TSKLocationDescription[locationIndex]; } diff --git a/source/neuro-fuzzy/granular_annbfis_classification.cpp b/source/neuro-fuzzy/granular_annbfis_classification.cpp index c9f6456..d6753e7 100644 --- a/source/neuro-fuzzy/granular_annbfis_classification.cpp +++ b/source/neuro-fuzzy/granular_annbfis_classification.cpp @@ -91,11 +91,12 @@ void ksi::granular_annbfis_classification::createFuzzyNonGranularRulebase( const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset & validation) { try { - ksi::annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_annbfis_classification.h b/source/neuro-fuzzy/granular_annbfis_classification.h index 9d57d6f..3d57f81 100644 --- a/source/neuro-fuzzy/granular_annbfis_classification.h +++ b/source/neuro-fuzzy/granular_annbfis_classification.h @@ -31,7 +31,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; diff --git a/source/neuro-fuzzy/granular_annbfis_regression.cpp b/source/neuro-fuzzy/granular_annbfis_regression.cpp index 8aa73a0..26b8899 100644 --- a/source/neuro-fuzzy/granular_annbfis_regression.cpp +++ b/source/neuro-fuzzy/granular_annbfis_regression.cpp @@ -86,11 +86,12 @@ void ksi::granular_annbfis_regression::createFuzzyNonGranularRulebase( const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset & validation) { try { - ksi::annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_annbfis_regression.h b/source/neuro-fuzzy/granular_annbfis_regression.h index be9b187..ece5806 100644 --- a/source/neuro-fuzzy/granular_annbfis_regression.h +++ b/source/neuro-fuzzy/granular_annbfis_regression.h @@ -39,7 +39,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; @@ -48,13 +49,7 @@ namespace ksi @return rulebase's answer of a data item @param item data item to elaborate answer for */ - virtual double answer (const datum & item) const; - - - - - - + virtual double answer (const datum & item) const; }; } diff --git a/source/neuro-fuzzy/granular_ma_classification.cpp b/source/neuro-fuzzy/granular_ma_classification.cpp index 34d4784..c53906e 100644 --- a/source/neuro-fuzzy/granular_ma_classification.cpp +++ b/source/neuro-fuzzy/granular_ma_classification.cpp @@ -137,11 +137,12 @@ void ksi::granular_ma_classification::createFuzzyNonGranularRulebase( const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset& validation) { try { - ksi::ma::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::ma::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_ma_classification.h b/source/neuro-fuzzy/granular_ma_classification.h index 5136d17..e6210ff 100644 --- a/source/neuro-fuzzy/granular_ma_classification.h +++ b/source/neuro-fuzzy/granular_ma_classification.h @@ -49,7 +49,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; diff --git a/source/neuro-fuzzy/granular_ma_regression.cpp b/source/neuro-fuzzy/granular_ma_regression.cpp index cf2d73c..2f2e86e 100644 --- a/source/neuro-fuzzy/granular_ma_regression.cpp +++ b/source/neuro-fuzzy/granular_ma_regression.cpp @@ -76,11 +76,12 @@ void ksi::granular_ma_regression::createFuzzyNonGranularRulebase( const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset& validation) { try { - ksi::ma::createFuzzyRulebase(nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::ma::createFuzzyRulebase(nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_ma_regression.h b/source/neuro-fuzzy/granular_ma_regression.h index 89b00c2..d989fa0 100644 --- a/source/neuro-fuzzy/granular_ma_regression.h +++ b/source/neuro-fuzzy/granular_ma_regression.h @@ -29,7 +29,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; @@ -39,12 +40,6 @@ namespace ksi @param item data item to elaborate answer for */ virtual double answer (const datum & item) const; - - - - - - }; } diff --git a/source/neuro-fuzzy/granular_nfs.cpp b/source/neuro-fuzzy/granular_nfs.cpp index 2dc14e1..dea07a9 100644 --- a/source/neuro-fuzzy/granular_nfs.cpp +++ b/source/neuro-fuzzy/granular_nfs.cpp @@ -285,7 +285,7 @@ void ksi::granular_nfs::createFuzzyRulebaseByParts( } createFuzzyRulebase(nNumberOfClusteringIterations, - nNumberofTuningIterations, dbLearningCoefficient, *granules); + nNumberofTuningIterations, dbLearningCoefficient, *granules, *granules); // validate == train delete granules; for (int i = 0; i < _nRules; i++) @@ -296,7 +296,7 @@ void ksi::granular_nfs::createFuzzyRulebaseByParts( { // debug("1 grade reduction"); createFuzzyRulebase(nNumberOfClusteringIterations, - nNumberofTuningIterations, dbLearningCoefficient, *first_grade_granules); + nNumberofTuningIterations, dbLearningCoefficient, *first_grade_granules, *first_grade_granules); // validate == train ksi::set_of_granules * next_grade_granules = new ksi::rulebase; for (int i = 0; i < _nRules; i++) @@ -319,7 +319,8 @@ void ksi::granular_nfs::createFuzzyRulebaseByParts( { createFuzzyRulebase(nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, - * first_grade_granules); + * first_grade_granules, + * first_grade_granules); // validate == train // auto reduced = reduce_granules_set (_1_grade_granules, nNumberOfRules); // _pRulebase->clear(); @@ -388,7 +389,7 @@ void ksi::granular_nfs::createFuzzyRulebaseHierarchical( ///@todo Trzeba wyznaczyć próg podziału klasy (threshold_value). createFuzzyRulebase(nNumberOfClusteringIterations, - nNumberofTuningIterations, dbLearningCoefficient, *granules); + nNumberofTuningIterations, dbLearningCoefficient, *granules, *granules); // validate == train delete granules; granules = nullptr; @@ -416,7 +417,8 @@ void ksi::granular_nfs::createFuzzyRulebaseHierarchical( createFuzzyRulebase(nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, - * granule_sets[it]); + * granule_sets[it], + * granule_sets[it]); // validate == train ksi::set_of_granules * next_grade_granules = _pRulebase->clone_set_of_granules(); @@ -448,7 +450,7 @@ void ksi::granular_nfs::createFuzzyRulebaseHierarchical( if (granule_sets[it]->size() > MINIMAL_NUMBER_OF_GRANULES_IN_SET) { createFuzzyRulebase(nNumberOfClusteringIterations, - nNumberofTuningIterations, dbLearningCoefficient, * granule_sets[it]); + nNumberofTuningIterations, dbLearningCoefficient, * granule_sets[it], * granule_sets[it]); // validate == train ksi::set_of_granules * next_grade_granules = _pRulebase->clone_set_of_granules(); @@ -479,7 +481,7 @@ void ksi::granular_nfs::createFuzzyRulebaseHierarchical( if (granule_sets.back()->size() > nNumberOfRules) { createFuzzyRulebase(nNumberOfClusteringIterations, - nNumberofTuningIterations, dbLearningCoefficient, *granule_sets.back()); + nNumberofTuningIterations, dbLearningCoefficient, *granule_sets.back(), *granule_sets.back()); // validate == train ksi::set_of_granules * next_grade_granules = new ksi::rulebase; for (int i = 0; i < _nRules; i++) next_grade_granules->addGranule(_pRulebase->operator[](i)); diff --git a/source/neuro-fuzzy/granular_nfs.h b/source/neuro-fuzzy/granular_nfs.h index 3e403aa..8c128bb 100644 --- a/source/neuro-fuzzy/granular_nfs.h +++ b/source/neuro-fuzzy/granular_nfs.h @@ -45,7 +45,8 @@ namespace ksi int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const ksi::set_of_granules & train) = 0; + const ksi::set_of_granules & train, + const ksi::set_of_granules & validation_granules) = 0; /** The method reads data by parts and creates a fuzzy rule base hierarchically with rules treated as granules. @@ -130,7 +131,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train) = 0; + const dataset & train, + const dataset & validation) = 0; /** The method reduces the set of granules. It leaves numberOfGranules rules in the set, the others are removed. */ // const ksi::granule_set reduce_granules_set ( diff --git a/source/neuro-fuzzy/granular_nfs_classification.cpp b/source/neuro-fuzzy/granular_nfs_classification.cpp index 00bf54c..d873a2c 100644 --- a/source/neuro-fuzzy/granular_nfs_classification.cpp +++ b/source/neuro-fuzzy/granular_nfs_classification.cpp @@ -264,7 +264,8 @@ void ksi::granular_nfs_classification::createFuzzyRulebase( int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const ksi::set_of_granules & train_granules) + const ksi::set_of_granules & train_granules, + const ksi::set_of_granules & validation_granules) { try { @@ -272,10 +273,11 @@ void ksi::granular_nfs_classification::createFuzzyRulebase( int number_of_items = NUMBER_OF_DATA_TO_GENERATE > train_granules.size() ? NUMBER_OF_DATA_TO_GENERATE : train_granules.size(); auto train = generate_dataset_from_granules(train_granules, number_of_items); + auto validate = generate_dataset_from_granules(validation_granules, number_of_items); // then elaborate fuzzy rule base for the data: createFuzzyNonGranularRulebase(nClusteringIterations, - nTuningIterations, dbLearningCoefficient, train); + nTuningIterations, dbLearningCoefficient, train, validate); auto nRules = _pRulebase->getNumberOfRules(); diff --git a/source/neuro-fuzzy/granular_nfs_classification.h b/source/neuro-fuzzy/granular_nfs_classification.h index dfc10ab..ced19f3 100644 --- a/source/neuro-fuzzy/granular_nfs_classification.h +++ b/source/neuro-fuzzy/granular_nfs_classification.h @@ -44,7 +44,8 @@ namespace ksi int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const ksi::set_of_granules & train_granules) override; + const ksi::set_of_granules & train_granules, + const ksi::set_of_granules & validation_granules) override; /** This function is not implemented in this class and will not be implemented, because the class is responsible for classification and not for regression. diff --git a/source/neuro-fuzzy/granular_nfs_regression.cpp b/source/neuro-fuzzy/granular_nfs_regression.cpp index 90b2aed..f5a03ea 100644 --- a/source/neuro-fuzzy/granular_nfs_regression.cpp +++ b/source/neuro-fuzzy/granular_nfs_regression.cpp @@ -385,18 +385,21 @@ void ksi::granular_nfs_regression::createFuzzyRulebase( int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const ksi::set_of_granules & train_granules) + const ksi::set_of_granules & train_granules, + const ksi::set_of_granules & validation_granules +) { try { // first prepare date: int number_of_items = NUMBER_OF_DATA_TO_GENERATE > train_granules.size() ? NUMBER_OF_DATA_TO_GENERATE : train_granules.size(); - auto train = generate_dataset_from_granules(train_granules, number_of_items); + auto train = generate_dataset_from_granules(train_granules, number_of_items); + auto validate = generate_dataset_from_granules(validation_granules, number_of_items); // then elaborate fuzzy rule base for the data: createFuzzyNonGranularRulebase(nClusteringIterations, - nTuningIterations, dbLearningCoefficient, train); + nTuningIterations, dbLearningCoefficient, train, validate); int nDataItem = train.getNumberOfData(); diff --git a/source/neuro-fuzzy/granular_nfs_regression.h b/source/neuro-fuzzy/granular_nfs_regression.h index 5684d3b..03b30f1 100644 --- a/source/neuro-fuzzy/granular_nfs_regression.h +++ b/source/neuro-fuzzy/granular_nfs_regression.h @@ -39,7 +39,8 @@ namespace ksi int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const ksi::set_of_granules & train_granules); + const ksi::set_of_granules & train_granules, + const ksi::set_of_granules & validation_granules); diff --git a/source/neuro-fuzzy/granular_subspace_annbfis_classification.cpp b/source/neuro-fuzzy/granular_subspace_annbfis_classification.cpp index 0fbd8b2..a6079a1 100644 --- a/source/neuro-fuzzy/granular_subspace_annbfis_classification.cpp +++ b/source/neuro-fuzzy/granular_subspace_annbfis_classification.cpp @@ -93,11 +93,12 @@ void ksi::granular_subspace_annbfis_classification::createFuzzyNonGranularRuleba const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset & validation) { try { - ksi::subspace_annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::subspace_annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_subspace_annbfis_classification.h b/source/neuro-fuzzy/granular_subspace_annbfis_classification.h index cb43960..a313b80 100644 --- a/source/neuro-fuzzy/granular_subspace_annbfis_classification.h +++ b/source/neuro-fuzzy/granular_subspace_annbfis_classification.h @@ -31,7 +31,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; diff --git a/source/neuro-fuzzy/granular_subspace_annbfis_regression.cpp b/source/neuro-fuzzy/granular_subspace_annbfis_regression.cpp index f551493..5c3b13d 100644 --- a/source/neuro-fuzzy/granular_subspace_annbfis_regression.cpp +++ b/source/neuro-fuzzy/granular_subspace_annbfis_regression.cpp @@ -75,11 +75,12 @@ void ksi::granular_subspace_annbfis_regression::createFuzzyNonGranularRulebase( const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset & validation) { try { - ksi::subspace_annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::subspace_annbfis::createFuzzyRulebase( nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_subspace_annbfis_regression.h b/source/neuro-fuzzy/granular_subspace_annbfis_regression.h index ecc4f03..e877c61 100644 --- a/source/neuro-fuzzy/granular_subspace_annbfis_regression.h +++ b/source/neuro-fuzzy/granular_subspace_annbfis_regression.h @@ -29,7 +29,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; @@ -39,12 +40,6 @@ namespace ksi @param item data item to elaborate answer for */ virtual double answer (const datum & item) const; - - - - - - }; } diff --git a/source/neuro-fuzzy/granular_tsk_classification.cpp b/source/neuro-fuzzy/granular_tsk_classification.cpp index 499863c..eacf1ff 100644 --- a/source/neuro-fuzzy/granular_tsk_classification.cpp +++ b/source/neuro-fuzzy/granular_tsk_classification.cpp @@ -89,11 +89,12 @@ void ksi::granular_tsk_classification::createFuzzyNonGranularRulebase( const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const ksi::dataset& validation) { try { - ksi::tsk::createFuzzyRulebase(nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::tsk::createFuzzyRulebase(nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_tsk_classification.h b/source/neuro-fuzzy/granular_tsk_classification.h index 2ec43d6..9761836 100644 --- a/source/neuro-fuzzy/granular_tsk_classification.h +++ b/source/neuro-fuzzy/granular_tsk_classification.h @@ -30,7 +30,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; diff --git a/source/neuro-fuzzy/granular_tsk_regression.cpp b/source/neuro-fuzzy/granular_tsk_regression.cpp index aac9a33..4c1315e 100644 --- a/source/neuro-fuzzy/granular_tsk_regression.cpp +++ b/source/neuro-fuzzy/granular_tsk_regression.cpp @@ -154,11 +154,12 @@ void ksi::granular_tsk_regression::createFuzzyNonGranularRulebase( const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const ksi::dataset& train) + const ksi::dataset& train, + const dataset & validation) { try { - ksi::tsk::createFuzzyRulebase(nClusteringIterations, nTuningIterations, dbLearningCoefficient, train); + ksi::tsk::createFuzzyRulebase(nClusteringIterations, nTuningIterations, dbLearningCoefficient, train, validation); } CATCH; } diff --git a/source/neuro-fuzzy/granular_tsk_regression.h b/source/neuro-fuzzy/granular_tsk_regression.h index 5abd77f..e7692c6 100644 --- a/source/neuro-fuzzy/granular_tsk_regression.h +++ b/source/neuro-fuzzy/granular_tsk_regression.h @@ -52,7 +52,8 @@ namespace ksi const int nClusteringIterations, const int nTuningIterations, const double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); public: virtual ksi::neuro_fuzzy_system * clone() const; diff --git a/source/neuro-fuzzy/ma.cpp b/source/neuro-fuzzy/ma.cpp index 2483078..21e63b4 100644 --- a/source/neuro-fuzzy/ma.cpp +++ b/source/neuro-fuzzy/ma.cpp @@ -19,6 +19,7 @@ #include "../tnorms/t-norm-product.h" #include "../partitions/cluster.h" #include "../partitions/fcm.h" +#include "../partitions/partition.h" #include "../tnorms/t-norm-product.h" #include "../implications/imp-reichenbach.h" #include "../descriptors/descriptor-gaussian.h" @@ -32,9 +33,9 @@ #include "../auxiliary/error-MAE.h" #include "../common/number.h" #include "../readers/reader-complete.h" +#include "../common/data-modifier.h" #include "../common/data-modifier-normaliser.h" #include "../partitions/partition.h" -#include "../service/debug.h" #include "../gan/discriminative_model.h" #include "../gan/generative_model.h" @@ -53,41 +54,27 @@ ksi::partition ksi::ma::doPartition(const ksi::dataset& X) void ksi::ma::set_name () { _name_of_neuro_fuzzy_system = std::string("MA"); - _description_of_neuro_fuzzy_system = std::string("Mamdami-Assilan neuro-fuzzy system with gaussian fuzzy sets in premises and product t-norm"); + _description_of_neuro_fuzzy_system = std::string("Mamdani-Assilan neuro-fuzzy system with gaussian fuzzy sets in premises and product t-norm"); } -ksi::neuro_fuzzy_system * ksi::ma::clone() const -{ - try - { - return new ksi::ma (*this); - } - CATCH; -} ksi::ma::ma() { -// debug(__LINE__); - set_name(); } ksi::ma::~ma() { - } - ksi::ma::ma(const ksi::ma & right) : neuro_fuzzy_system(right), abstract_ma(right) { - } ksi::ma::ma(ksi::ma && right) : neuro_fuzzy_system(right), abstract_ma(right) { - } ksi::ma & ksi::ma::operator=(const ksi::ma & right) @@ -111,19 +98,6 @@ ksi::ma & ksi::ma::operator= (ksi::ma && right) return *this; } - -ksi::discriminative_model * ksi::ma::clone_discriminant() const -{ - return new ksi::ma (*this); -} - -ksi::generative_model * ksi::ma::clone_generator() const -{ - return new ksi::ma (*this); -} - - - ksi::ma::ma(int nRules, int nClusteringIterations, @@ -163,3 +137,23 @@ ksi::ma::ma(int nRules, set_name(); } + +ksi::neuro_fuzzy_system * ksi::ma::clone() const +{ + try + { + return new ksi::ma (*this); + } + CATCH; +} + +ksi::discriminative_model * ksi::ma::clone_discriminant() const +{ + return new ksi::ma (*this); +} + +ksi::generative_model * ksi::ma::clone_generator() const +{ + return new ksi::ma (*this); +} + diff --git a/source/neuro-fuzzy/neuro-fuzzy-system.cpp b/source/neuro-fuzzy/neuro-fuzzy-system.cpp index 4f4f830..72e45a5 100644 --- a/source/neuro-fuzzy/neuro-fuzzy-system.cpp +++ b/source/neuro-fuzzy/neuro-fuzzy-system.cpp @@ -1,31 +1,30 @@ /** @file */ -#include -#include #include -#include +#include +#include +#include #include +#include +#include #include -#include - - -#include "neuro-fuzzy-system.h" -#include "rulebase.h" -#include "../service/debug.h" -#include "../readers/reader-complete.h" -#include "../common/data-modifier-normaliser.h" -#include "../auxiliary/error-RMSE.h" -#include "../auxiliary/error-MAE.h" -#include "../common/number.h" -#include "../common/result.h" -#include "../auxiliary/tempus.h" #include "../auxiliary/clock.h" -#include "../auxiliary/roc.h" #include "../auxiliary/confusion-matrix.h" #include "../auxiliary/directory.h" +#include "../auxiliary/error-MAE.h" +#include "../auxiliary/error-RMSE.h" +#include "../auxiliary/roc.h" +#include "../auxiliary/tempus.h" +#include "../common/data-modifier-normaliser.h" +#include "../common/number.h" +#include "../common/result.h" #include "../gan/discriminative_model.h" #include "../gan/generative_model.h" +#include "../neuro-fuzzy/neuro-fuzzy-system.h" +#include "../neuro-fuzzy/rulebase.h" +#include "../readers/reader-complete.h" +#include "../service/debug.h" #include "../service/debug.h" @@ -211,6 +210,20 @@ ksi::neuro_fuzzy_system::neuro_fuzzy_system(const std::string & trainDataFile, _pModyfikator = nullptr; } + +ksi::neuro_fuzzy_system::neuro_fuzzy_system(const std::string & trainDataFile, + const std::string & validationFile, + const std::string & testDataFile, + const std::string & resultsFile) +{ + _train_data_file = trainDataFile; + _validation_data_file = validationFile; + _test_data_file = testDataFile; + _output_file = resultsFile; + _pPartitioner = nullptr; + _pModyfikator = nullptr; +} + ksi::neuro_fuzzy_system::neuro_fuzzy_system(const ksi::dataset & trainDataSet, const ksi::dataset & testDataSet, const std::string & resultsFile) @@ -223,6 +236,23 @@ ksi::neuro_fuzzy_system::neuro_fuzzy_system(const ksi::dataset & trainDataSet, } +ksi::neuro_fuzzy_system::neuro_fuzzy_system(const ksi::dataset& trainData, + const ksi::dataset& validationData, + const ksi::dataset& testData, + const std::string& resultsFile) +{ + _TrainDataset = trainData; + _ValidationDataset = validationData; + _TestDataset = testData; + _output_file = resultsFile; + _pPartitioner = nullptr; + _pModyfikator = nullptr; +} + + + + + ksi::neuro_fuzzy_system::neuro_fuzzy_system(const std::string & trainDataFile, const std::string & testDataFile, const std::string & resultsFile, @@ -239,8 +269,6 @@ ksi::neuro_fuzzy_system::neuro_fuzzy_system(const ksi::data_modifier& modifier) _pModyfikator = std::shared_ptr (modifier.clone()); } - - ksi::neuro_fuzzy_system::neuro_fuzzy_system( const std::string& trainDataFile, const std::string& testDataFile, @@ -261,13 +289,19 @@ ksi::neuro_fuzzy_system::neuro_fuzzy_system( _pModyfikator = nullptr; } - - std::string ksi::neuro_fuzzy_system::get_nfs_description() const { return _description_of_neuro_fuzzy_system; } +std::string ksi::neuro_fuzzy_system::get_brief_nfs_name() const +{ + if (_brief_name_of_neuro_fuzzy_system.empty()) + return _name_of_neuro_fuzzy_system; + else + return _brief_name_of_neuro_fuzzy_system; +} + std::string ksi::neuro_fuzzy_system::get_nfs_name() const { return _name_of_neuro_fuzzy_system; @@ -387,11 +421,14 @@ void ksi::neuro_fuzzy_system::copy_fields(const ksi::neuro_fuzzy_system & wzor) _dbLearningCoefficient = wzor._dbLearningCoefficient; _bNormalisation = wzor._bNormalisation; _TrainDataset = wzor._TrainDataset; + _ValidationDataset = wzor._ValidationDataset; _TestDataset = wzor._TestDataset; _name_of_neuro_fuzzy_system = wzor._name_of_neuro_fuzzy_system; + _brief_name_of_neuro_fuzzy_system = wzor._brief_name_of_neuro_fuzzy_system; _description_of_neuro_fuzzy_system = wzor._description_of_neuro_fuzzy_system; _train_data_file = wzor._train_data_file; + _validation_data_file = wzor._validation_data_file; _test_data_file = wzor._test_data_file; _output_file = wzor._output_file; _positive_class = wzor._positive_class; @@ -405,9 +442,16 @@ void ksi::neuro_fuzzy_system::copy_fields(const ksi::neuro_fuzzy_system & wzor) ksi::result ksi::neuro_fuzzy_system::experiment_classification_core() { + if (_ValidationDataset.size() == 0) // no validation dataset, use train set instead + { + _ValidationDataset = _TrainDataset; + _validation_data_file = _train_data_file; + } return experiment_classification_core(_TrainDataset, + _ValidationDataset, _TestDataset, _train_data_file, + _validation_data_file, _test_data_file, _output_file, _nRules, @@ -436,7 +480,11 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification() _threshold_type); } -ksi::result ksi::neuro_fuzzy_system::experiment_classification(const std::string& trainDataFile, const std::string& testDataFile, const std::string& outputFile) +ksi::result ksi::neuro_fuzzy_system::experiment_classification( + const std::string& trainDataFile, + const std::string& testDataFile, + const std::string& outputFile +) { return experiment_classification(trainDataFile, testDataFile, @@ -451,6 +499,27 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification(const std::string _threshold_type); } +ksi::result ksi::neuro_fuzzy_system::experiment_classification( + const std::string& trainDataFile, + const std::string& valiadationDataFile, + const std::string& testDataFile, + const std::string& outputFile +) +{ + return experiment_classification(trainDataFile, + valiadationDataFile, + testDataFile, + outputFile, + _nRules, + _nClusteringIterations, + _nTuningIterations, + _dbLearningCoefficient, + _bNormalisation, + _positive_class, + _negative_class, + _threshold_type); +} + ksi::result ksi::neuro_fuzzy_system::experiment_regression( const ksi::dataset& trainDataSet, @@ -462,7 +531,6 @@ ksi::result ksi::neuro_fuzzy_system::experiment_regression( return experiment_regression(trainDataSet, testDataSet, empty, empty, outputFile, this->_nRules, this->_nClusteringIterations, this->_nTuningIterations, this->_dbLearningCoefficient, this->_bNormalisation); } - ksi::result ksi::neuro_fuzzy_system::experiment_classification( const ksi::dataset& trainDataSet, const ksi::dataset& testDataSet, @@ -543,6 +611,14 @@ double ksi::neuro_fuzzy_system::elaborate_threshold_value( return _threshold_value; } +double ksi::neuro_fuzzy_system::get_number_of_rules() const +{ + if (_pRulebase) + return (double) _pRulebase->size(); + else + return 0; +} + std::pair ksi::neuro_fuzzy_system::answer_classification(const ksi::datum& item) const { @@ -566,15 +642,56 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( const double dbNegativeClass, ksi::roc_threshold threshold_type) { + + return ksi::neuro_fuzzy_system::experiment_classification_core + ( + trainDataset, + trainDataset, // train == validation + testDataset, + trainDataFile, + trainDataFile, + testDataFile, + outputFile, + nNumberOfRules, + nNumberOfClusteringIterations, + nNumberofTuningIterations, + dbLearningCoefficient, + bNormalisation, + dbPositiveClass, + dbNegativeClass, + threshold_type + ); +} + +ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( + const ksi::dataset& trainDataset, + const ksi::dataset& validationDataset, + const ksi::dataset& testDataset, + const std::string & trainDataFile, + const std::string & validationDataFile, + const std::string & testDataFile, + const std::string & outputFile, + const int nNumberOfRules, + const int nNumberOfClusteringIterations, + const int nNumberofTuningIterations, + const double dbLearningCoefficient, + const bool bNormalisation, + const double dbPositiveClass, + const double dbNegativeClass, + ksi::roc_threshold threshold_type) +{ try { _TrainDataset = trainDataset; + _ValidationDataset = validationDataset; _TestDataset = testDataset; ksi::result wynik; _train_data_file = trainDataFile; + _validation_data_file = validationDataFile; _test_data_file = testDataFile; + _output_file = outputFile; _nRules = nNumberOfRules; _dbLearningCoefficient = dbLearningCoefficient; _bNormalisation = bNormalisation; @@ -583,26 +700,32 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( { ksi::data_modifier_normaliser normaliser; normaliser.modify(_TrainDataset); + normaliser.modify(_ValidationDataset); normaliser.modify(_TestDataset); } if (_pModyfikator) _pModyfikator->modify(_TrainDataset); - + ksi::clock zegar; zegar.start(); + createFuzzyRulebase(nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, - _TrainDataset); + _TrainDataset, _ValidationDataset); zegar.stop(); if (_pRulebase) { - if (not _pRulebase->validate()) - throw std::string ("rule base not valid"); + if (not _pRulebase->validate()) + throw std::string ("rule base not valid"); } - ksi::directory::create_directory_for_file(outputFile); + try + { + ksi::directory::create_directory_for_file(outputFile); + } + CATCH; std::ofstream model (outputFile); if (not model) { @@ -612,10 +735,13 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( throw ss.str(); } + // thdebug(__LINE__); std::vector wYtestExpected, wYtestElaboratedClass, wYtestElaboratedNumeric, wYtrainExpected, wYtrainElaboratedClass, wYtrainElaboratedNumeric; + // thdebug(__LINE__); get_answers_for_train_classification(); + // thdebug(__LINE__); for (const auto & answer : _answers_for_train) { double expected, el_numeric; @@ -627,13 +753,17 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( model << classification_intro() << std::endl; if (threshold_type != ksi::roc_threshold::none) model << "classification threshold type: " << ksi::to_string(threshold_type) << std::endl; + // thdebug(__LINE__); _threshold_value = elaborate_threshold_value (wYtrainExpected, wYtrainElaboratedNumeric, dbPositiveClass, dbNegativeClass, threshold_type); + // thdebug(__LINE__); wYtrainElaboratedClass.clear(); wYtrainElaboratedNumeric.clear(); wYtrainExpected.clear(); + // thdebug(__LINE__); get_answers_for_train_classification(); + // thdebug(__LINE__); for (const auto & answer : _answers_for_train) { double expected, el_numeric, el_class; @@ -647,7 +777,9 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( wYtestElaboratedNumeric.clear(); wYtestExpected.clear(); + // thdebug(__LINE__); get_answers_for_test_classification(); + // thdebug(__LINE__); for (const auto & answer : _answers_for_test) { double expected, el_numeric, el_class; @@ -657,7 +789,9 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( wYtestElaboratedClass.push_back(el_class); } + // thdebug(__LINE__); model << get_classification_threshold_value(); + // thdebug(__LINE__); model << "fuzzy rule base creation time: "; if (zegar.elapsed_seconds() > 10) @@ -665,6 +799,10 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( else model << zegar.elapsed_milliseconds() << " [ms]"; model << std::endl; + + model << report_average_number_of_rules_for_test() << std::endl; + model << report_average_number_of_rules_for_train() << std::endl; + ///////////////// confusion matrices confusion_matrix con_test; @@ -672,24 +810,30 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( model << std::endl; model << "confusion matrix for test data" << std::endl; + // thdebug(__LINE__); con_test.calculate_statistics(wYtestExpected, wYtestElaboratedClass, dbPositiveClass, dbNegativeClass, TP, TN, FP, FN); + // thdebug(__LINE__); wynik.TestPositive2Positive = TP; wynik.TestPositive2Negative = FN; wynik.TestNegative2Negative = TN; wynik.TestNegative2Positive = FP; + // thdebug(__LINE__); model << con_test.print(TP, TN, FP, FN); + // thdebug(__LINE__); model << std::endl; //---------------- model << std::endl; model << "confusion matrix for train data" << std::endl; + // thdebug(__LINE__); con_test.calculate_statistics(wYtrainExpected, wYtrainElaboratedClass, dbPositiveClass, dbNegativeClass, TP, TN, FP, FN); + // thdebug(__LINE__); wynik.TrainPositive2Positive = TP; wynik.TrainPositive2Negative = FN; wynik.TrainNegative2Negative = TN; @@ -712,26 +856,28 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification_core( model << "answers for the train set" << std::endl; model << "expected\telaborated_numeric\telaborated_class" << std::endl; - for (const auto answer : _answers_for_train) + // thdebug(__LINE__); + for (const auto & answer : _answers_for_train) { double expected, el_numeric, el_class; std::tie(expected, el_numeric, el_class) = answer; model << expected << " " << el_numeric << " " << el_class << std::endl; } + // thdebug(__LINE__); model << std::endl << std::endl; model << "answers for the test set" << std::endl; model << "expected\telaborated_numeric\telaborated_class" << std::endl; - for (const auto answer : _answers_for_test) + for (const auto & answer : _answers_for_test) { double expected, el_numeric, el_class; std::tie(expected, el_numeric, el_class) = answer; model << expected << " " << el_numeric << " " << el_class << std::endl; } - model.close(); - + model.close(); + // thdebug(__LINE__); return wynik; } @@ -757,7 +903,33 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification( auto zbiorTrain = czytacz.read(trainDataFile); auto zbiorTest = czytacz.read(testDataFile); - return experiment_classification_core(zbiorTrain, zbiorTest, trainDataFile, testDataFile, outputFile, nNumberOfRules, nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, bNormalisation, dbPositiveClass, dbNegativeClass, threshold_type); + return experiment_classification_core(zbiorTrain, zbiorTrain, zbiorTest, trainDataFile, trainDataFile, testDataFile, outputFile, nNumberOfRules, nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, bNormalisation, dbPositiveClass, dbNegativeClass, threshold_type); + } + CATCH; +} + +ksi::result ksi::neuro_fuzzy_system::experiment_classification( + const std::string &trainDataFile, + const std::string &validationDataFile, + const std::string &testDataFile, + const std::string &outputFile, + const int nNumberOfRules, + const int nNumberOfClusteringIterations, + const int nNumberofTuningIterations, + const double dbLearningCoefficient, + const bool bNormalisation, + const double dbPositiveClass, + const double dbNegativeClass, + ksi::roc_threshold threshold_type) +{ + try + { + ksi::reader_complete czytacz; + auto zbiorTrain = czytacz.read(trainDataFile); + auto zbiorValidation = czytacz.read(validationDataFile); + auto zbiorTest = czytacz.read(testDataFile); + + return experiment_classification_core(zbiorTrain, zbiorValidation, zbiorTest, trainDataFile, validationDataFile, testDataFile, outputFile, nNumberOfRules, nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, bNormalisation, dbPositiveClass, dbNegativeClass, threshold_type); } CATCH; } @@ -780,7 +952,7 @@ ksi::result ksi::neuro_fuzzy_system::experiment_classification ( { std::string empty {""}; - return experiment_classification_core(trainData, testData, empty, empty, outputFile, nNumberOfRules, nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, bNormalisation, dbPositiveClass, dbNegativeClass, threshold_type); + return experiment_classification_core(trainData, trainData, testData, empty, empty, empty, outputFile, nNumberOfRules, nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, bNormalisation, dbPositiveClass, dbNegativeClass, threshold_type); } CATCH; } @@ -799,6 +971,203 @@ ksi::result ksi::neuro_fuzzy_system::experiment_regression(const std::string & t return experiment_regression(zbiorTrain, zbiorTest, trainDataFile, testDataFile, outputFile, nNumberOfRules, nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, bNormalisation); } +ksi::result ksi::neuro_fuzzy_system::experiment_regression_core( + const ksi::dataset & trainDataset, + const ksi::dataset & validationDataset, + const ksi::dataset & testDataset, + const std::string & trainDataFile, + const std::string & validationDataFile, + const std::string & testDataFile, + const std::string & outputFile, + const int nNumberOfRules, + const int nNumberOfClusteringIterations, + const int nNumberofTuningIterations, + const double dbLearningCoefficient, + const bool bNormalisation +) +{ + try + { + _TrainDataset = trainDataset; + _ValidationDataset = validationDataset; + _TestDataset = testDataset; + + ksi::result experiment_results; + + _train_data_file = trainDataFile; + _validation_data_file = validationDataFile; + _test_data_file = testDataFile; + _output_file = outputFile; + _nRules = nNumberOfRules; + _dbLearningCoefficient = dbLearningCoefficient; + _bNormalisation = bNormalisation; + + if (bNormalisation) + { + ksi::data_modifier_normaliser normaliser; + normaliser.modify(_TrainDataset); + normaliser.modify(_ValidationDataset); + normaliser.modify(_TestDataset); + } + + if (_pModyfikator) + _pModyfikator->modify(_TrainDataset); + + ksi::clock zegar; + zegar.start(); + + createFuzzyRulebase(nNumberOfClusteringIterations, + nNumberofTuningIterations, dbLearningCoefficient, + _TrainDataset, _ValidationDataset); + zegar.stop(); + + if (not _pRulebase->validate()) + throw std::string ("rule base not valid"); + + auto XYtest = _TestDataset.splitDataSetVertically(_TestDataset.getNumberOfAttributes() - 1); + auto XYtrain = _TrainDataset.splitDataSetVertically(_TrainDataset.getNumberOfAttributes() - 1); + std::size_t nXtest = _TestDataset.getNumberOfData(); + std::size_t nXtrain = _TrainDataset.getNumberOfData(); + + /////////////////////////// + std::vector wYtestExpected (nXtest), wYtestElaborated (nXtest); + std::vector wYtrainExpected(nXtrain), wYtrainElaborated(nXtrain); + + #pragma omg parallel for + for (std::size_t i = 0; i < nXtest; i++) + { + wYtestExpected[i] = XYtest.second.get(i, 0); + wYtestElaborated[i] = answer(*(XYtest.first.getDatum(i))); + } + + #pragma omg parallel for + for (std::size_t i = 0; i < nXtrain; i++) + { + wYtrainExpected[i] = XYtrain.second.get(i, 0); + wYtrainElaborated[i] = answer(*(XYtrain.first.getDatum(i))); + } + /////////////////////////// + + ksi::error_RMSE rmse; + double blad_rmse_test = rmse.getError(wYtestElaborated, wYtestExpected); + double blad_rmse_train = rmse.getError(wYtrainElaborated, wYtrainExpected); + ksi::error_MAE mae; + double blad_mae_test = mae.getError(wYtestElaborated, wYtestExpected); + double blad_mae_train = mae.getError(wYtrainElaborated, wYtrainExpected); + + experiment_results.mae_test = blad_mae_test; + experiment_results.mae_train = blad_mae_train; + experiment_results.rmse_test = blad_rmse_test; + experiment_results.rmse_train = blad_rmse_train; + + ksi::directory::create_directory_for_file(outputFile); + std::ofstream model (outputFile); + if (not model) + { + std::stringstream ss; + ss << "I cannot open \"" << outputFile << "\" file!"; + + throw ss.str(); + } + model << "EXPERIMENT" << std::endl; + model << "==========" << std::endl; + model << getDateTimeNow() << std::endl; + model << std::endl; + model << get_nfs_description() << std::endl; + model << get_nfs_name() << std::endl; + + if (nNumberOfRules > 0) + model << "number of rules: " << nNumberOfRules << std::endl; + if (nNumberOfClusteringIterations > 0) + model << "number of clustering iterations: " << nNumberOfClusteringIterations << std::endl; + + if (nNumberofTuningIterations > 0) + model << "number of tuning interations: " << nNumberofTuningIterations << std::endl; + + if (dbLearningCoefficient > -1) + model << "learning coefficient: " << dbLearningCoefficient << std::endl; + + if (_pPartitioner) + model << "partitioner: " << _pPartitioner->getAbbreviation() << std::endl; + auto report = extra_report (); + if (not report.empty()) + model << report << std::endl; + + if (not trainDataFile.empty()) + model << "train data file: " << trainDataFile << std::endl; + if (not _validation_data_file.empty()) + model << "validation data file: " << _validation_data_file << std::endl; + if (not testDataFile.empty()) + model << "test data file: " << testDataFile << std::endl; + model << "normalisation: " << std::boolalpha << bNormalisation << std::endl; + if (_pModyfikator) + model << "train data set modifier(s): " << _pModyfikator->print() << std::endl; + + model << "RMSE for train data: " << blad_rmse_train << std::endl; + model << "RMSE for test data: " << blad_rmse_test << std::endl; + + model << "MAE for train data: " << blad_mae_train << std::endl; + model << "MAE for test data: " << blad_mae_test << std::endl; + model << "fuzzy rule base creation time: "; + if (zegar.elapsed_seconds() > 10) + model << zegar.elapsed_seconds() << " [s]"; + else + model << zegar.elapsed_milliseconds() << " [ms]"; + model << std::endl; + + /////////////////////// + // print model parameters and its linguistic decription + model << std::endl << std::endl; + model << "fuzzy rule base" << std::endl; + printRulebase (model); + model << std::endl << std::endl; + model << "linguistic description of fuzzy rule base" << std::endl; + printLinguisticDescriptionRulebase(model); + model << std::endl << std::endl << std::endl; + /////////////////////// + _answers_for_test.clear(); + _answers_for_train.clear(); + + model << std::endl << std::endl << std::endl; + model << "train data" << std::endl; + model << "expected\telaborated" << std::endl; + model << "===========================" << std::endl; + //////////// + _answers_for_train.resize(nXtrain); + #pragma omp parallel for + for (std::size_t i = 0; i < nXtrain; i++) + { + double expected = wYtrainExpected[i]; + double elaborated = wYtrainElaborated[i]; + model << expected << '\t' << elaborated << std::endl; + _answers_for_train[i] = {expected, elaborated, elaborated}; + } + ////////////// + model << std::endl << std::endl; + model << "test data" << std::endl; + model << "expected\telaborated" << std::endl; + model << "===========================" << std::endl; + + //////////////////// + _answers_for_test.resize(nXtest); + #pragma omp parallel for + for (std::size_t i = 0; i < nXtest; i++) + { +// model << wYtestExpected[i] << '\t' << wYtestElaborated[i] << std::endl; + + double expected = wYtestExpected[i]; + double elaborated = wYtestElaborated[i]; + model << expected << '\t' << elaborated << std::endl; + _answers_for_test[i] = {expected, elaborated, elaborated}; + } + ///////////////// + + model.close(); + return experiment_results; + } + CATCH; +} + ksi::result ksi::neuro_fuzzy_system::experiment_regression( const ksi::dataset & train, const ksi::dataset & test, @@ -814,7 +1183,12 @@ ksi::result ksi::neuro_fuzzy_system::experiment_regression( { try { + // validation == train + return ksi::neuro_fuzzy_system::experiment_regression_core(train, train, test, trainDataFile, trainDataFile, testDataFile, outputFile, nNumberOfRules, nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, bNormalisation); + ///// dead code: + + ksi::result experiment_results; // ksi::reader_complete czytacz; // auto zbiorTrain = czytacz.read(trainDataFile); @@ -838,7 +1212,7 @@ ksi::result ksi::neuro_fuzzy_system::experiment_regression( createFuzzyRulebase(nNumberOfClusteringIterations, nNumberofTuningIterations, dbLearningCoefficient, - trainDataset); + trainDataset, ksi::dataset()); /// @todo regression zegar.stop(); if (not _pRulebase->validate()) @@ -914,9 +1288,11 @@ ksi::result ksi::neuro_fuzzy_system::experiment_regression( model << report << std::endl; if (not trainDataFile.empty()) - model << "train data file: " << trainDataFile << std::endl; + model << "train data file: " << trainDataFile << std::endl; + if (not _validation_data_file.empty()) + model << "validation data file: " << _validation_data_file << std::endl; if (not testDataFile.empty()) - model << "test data file: " << testDataFile << std::endl; + model << "test data file: " << testDataFile << std::endl; model << "normalisation: " << std::boolalpha << bNormalisation << std::endl; if (_pModyfikator) model << "train data set modifier(s): " << _pModyfikator->print() << std::endl; @@ -1041,14 +1417,19 @@ void ksi::neuro_fuzzy_system::set_test_dataset(const ksi::dataset& ds) _TestDataset = ds; } +void ksi::neuro_fuzzy_system::set_validation_dataset (const ksi::dataset& ds) +{ + _ValidationDataset = ds; +} + void ksi::neuro_fuzzy_system::set_train_dataset(const ksi::dataset& ds) { _TrainDataset = ds; } -void ksi::neuro_fuzzy_system::createFuzzyRulebase(const ksi::dataset& train, const ksi::dataset& test) +void ksi::neuro_fuzzy_system::createFuzzyRulebase(const ksi::dataset& train, const ksi::dataset& test, const ksi::dataset & validat) { - createFuzzyRulebase(_nClusteringIterations, _nTuningIterations, _dbLearningCoefficient, train); + createFuzzyRulebase(_nClusteringIterations, _nTuningIterations, _dbLearningCoefficient, train, test); } std::vector > ksi::neuro_fuzzy_system::get_answers_for_train_classification() @@ -1109,6 +1490,11 @@ void ksi::neuro_fuzzy_system::set_train_data_file(const std::string& file) _train_data_file = file; } +void ksi::neuro_fuzzy_system::set_validation_data_file(const std::string& file) +{ + _validation_data_file = file; +} + void ksi::neuro_fuzzy_system::set_test_data_file(const std::string& file) { _test_data_file = file; @@ -1145,9 +1531,11 @@ std::string ksi::neuro_fuzzy_system::classification_intro() const model << "number of tuning interations: " << _nTuningIterations << std::endl; model << "learning coefficient: " << _dbLearningCoefficient << std::endl; if (not _train_data_file.empty()) - model << "train data file: " << _train_data_file << std::endl; + model << "train data file: " << _train_data_file << std::endl; + if (not _validation_data_file.empty()) + model << "validation data file: " << _validation_data_file << std::endl; if (not _test_data_file.empty()) - model << "test data file: " << _test_data_file << std::endl; + model << "test data file: " << _test_data_file << std::endl; model << "normalisation: " << std::boolalpha << _bNormalisation << std::endl; if (_pModyfikator) model << "train data set modifier(s): " << _pModyfikator->print() << std::endl; @@ -1212,3 +1600,26 @@ double ksi::neuro_fuzzy_system::modify_learning_coefficient(const double learnin return learning_coefficient; } +double ksi::neuro_fuzzy_system::get_train_dataset_cardinality() const +{ + return _TrainDataset.get_cardinality(); +} + +std::size_t ksi::neuro_fuzzy_system::get_train_dataset_size() const +{ + return _TrainDataset.size(); +} + +std::string ksi::neuro_fuzzy_system::report_average_number_of_rules_for_test() const +{ + return {}; +} + +std::string ksi::neuro_fuzzy_system::report_average_number_of_rules_for_train() const +{ + return {}; +} + + + + diff --git a/source/neuro-fuzzy/neuro-fuzzy-system.h b/source/neuro-fuzzy/neuro-fuzzy-system.h index ac3184b..8afb8b6 100644 --- a/source/neuro-fuzzy/neuro-fuzzy-system.h +++ b/source/neuro-fuzzy/neuro-fuzzy-system.h @@ -49,6 +49,9 @@ namespace ksi /** train dataset */ dataset _TrainDataset; + /** validation dataset */ + dataset _ValidationDataset; + /** test dataset */ dataset _TestDataset; @@ -61,6 +64,8 @@ namespace ksi /** partitioner for identification of a fuzzy model */ partitioner * _pPartitioner = nullptr; + /** very short (brief) name of a neuro-fuzzy system */ + std::string _brief_name_of_neuro_fuzzy_system; /** short name of a neuro-fuzzy system */ std::string _name_of_neuro_fuzzy_system; /** short description of a neuro-fuzzy system showing its main features */ @@ -68,6 +73,7 @@ namespace ksi std::string _train_data_file; ///< name of train data file std::string _test_data_file; ///< name of test data file + std::string _validation_data_file ; ///< name of validation data file std::string _output_file; ///< name of output file double _positive_class; ///< label for positive class in classification @@ -83,6 +89,16 @@ namespace ksi std::vector> _answers_for_train; ///< answers for the train set: expected elaborated_numeric elaborated_class std::vector> _answers_for_test; ///< answers for the test set: expected elaborated_numeric elaborated_class + + public: + /** @return number of item in the train dataset + @date 2024-03-08 */ + std::size_t get_train_dataset_size() const; + + public: + /** @return sum of weights of all items in the train dataset + @date 2024-03-08 */ + double get_train_dataset_cardinality() const; public: /** @return The method return the value of the positive class. @date 2021-12-27 */ @@ -100,6 +116,11 @@ namespace ksi @date 2021-12-27 */ void set_negative_class (const double n); + public: + /** @return number of rules in the fuzzy system + * @date 2024-03-24 */ + virtual double get_number_of_rules () const; + public: /** @return threshold value elaborated for classification @date 2021-09-16 @@ -120,12 +141,12 @@ namespace ksi /** @return expected class, elaborated_numeric answer, elaborated_class for the train dataset @date 2021-09-16 */ - std::vector> get_answers_for_train_classification (); + virtual std::vector> get_answers_for_train_classification (); /** @return expected class, elaborated_numeric answer, elaborated_class for the test dataset @date 2021-09-16 */ - std::vector> get_answers_for_test_classification () ; + virtual std::vector> get_answers_for_test_classification () ; protected: @@ -200,6 +221,20 @@ namespace ksi const std::string & testDataFile, const std::string & resultsFile ); + + /** + @param trainDataFile + @param validationDataFile + @param testDataFile + @param resultsFile + @date 2024-03-11 + */ + neuro_fuzzy_system (const std::string & trainDataFile, + const std::string & validationDataFile, + const std::string & testDataFile, + const std::string & resultsFile + ); + /** @param trainData train dataset @@ -212,6 +247,18 @@ namespace ksi const std::string & resultsFile ); + /** + @param trainData train dataset + @param validationData validation dataset + @param testData test dataset + @param resultsFile name of the result file + @date 2024-03-11 + */ + neuro_fuzzy_system (const ksi::dataset & trainData, + const ksi::dataset & validationData, + const ksi::dataset & testData, + const std::string & resultsFile + ); /** @@ -303,10 +350,11 @@ namespace ksi * @param nTuningIterations number of tuning iterations * @param dbLearningCoefficient learning coefficient for gradient method * @param train train dataset + * @param validation validation dataset * @date 2018-03-29 * @author Krzysztof Siminski */ - virtual void createFuzzyRulebase (int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const dataset & train) = 0; + virtual void createFuzzyRulebase (int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const ksi::dataset & train, const dataset & validation) = 0; /** The method creates a fuzzy rulebase from the dataset. @@ -315,7 +363,7 @@ namespace ksi * @date 2021-09-14 * @author Krzysztof Siminski */ - virtual void createFuzzyRulebase (const dataset & train, const dataset & test); + virtual void createFuzzyRulebase (const dataset & train, const dataset & test, const dataset & validate); /** The method executes an experiment for regression. * @param trainDataFile name of file with train data @@ -352,6 +400,22 @@ namespace ksi const bool bNormalisation ); + /** @date 2024-04-22 */ +virtual result experiment_regression_core( + const ksi::dataset & trainDataset, + const ksi::dataset & validationDataset, + const ksi::dataset & testDataset, + const std::string & trainDataFile, + const std::string & validationDataFile, + const std::string & testDataFile, + const std::string & outputFile, + const int nNumberOfRules, + const int nNumberOfClusteringIterations, + const int nNumberofTuningIterations, + const double dbLearningCoefficient, + const bool bNormalisation +); + /** The method executes answers for the regression task. * @param trainDataFile name of file with train data * @param outputfile name of file to print results to @@ -391,6 +455,37 @@ namespace ksi const double dbNegativeClass, ksi::roc_threshold threshold_type ); + + + /** The method executes an experiment for classification. + * @param trainDataFile name of file with train data + * @param validationDataFile name of file with validation data + * @param testDataFile name of file with test data + * @param outputfile name of file to print results to + * @param nNumberOfRules number of rules + * @param nNumberOfClusteringIterations number of clustering iterations + * @param nNumberofTuningIterations number of tuning iterations + * @param dbLearingCoefficient learning coefficient for gradient method + * @param bNormalisation true, if normalisation of data, false -- otherwise + * @param dbPositiveClass label of a positive class + * @param dbNegativeClass label of a negative class + * @param threshold_type classification threshold type + * @date 2018-02-04 + * @author Krzysztof Siminski + */ + virtual result experiment_classification (const std::string & trainDataFile, + const std::string & validationDataFile, + const std::string & testDataFile, + const std::string & outputFile, + const int nNumberOfRules, + const int nNumberOfClusteringIterations, + const int nNumberofTuningIterations, + const double dbLearningCoefficient, + const bool bNormalisation, + const double dbPositiveClass, + const double dbNegativeClass, + ksi::roc_threshold threshold_type + ); /** The method executes an experiment for classification. * @param trainData train data set @@ -438,6 +533,25 @@ namespace ksi ksi::roc_threshold threshold_type ); + + virtual result experiment_classification_core( + const ksi::dataset& trainDataset, + const ksi::dataset& validationDataset, + const ksi::dataset& testDataset, + const std::string & trainDataFile, + const std::string & validationDataFile, + const std::string & testDataFile, + const std::string& outputFile, + const int nNumberOfRules, + const int nNumberOfClusteringIterations, + const int nNumberofTuningIterations, + const double dbLearningCoefficient, + const bool bNormalisation, + const double dbPositiveClass, + const double dbNegativeClass, + ksi::roc_threshold threshold_type + ); + virtual result experiment_classification_core (); public: @@ -448,6 +562,12 @@ namespace ksi virtual result experiment_classification (const std::string & trainDataFile, const std::string & testDataFile, const std::string & outputFile); + + /** Just run an experiment for classification. All parameters should be already set. */ + virtual result experiment_classification (const std::string & trainDataFile, + const std::string & valiadtionDataFile, + const std::string & testDataFile, + const std::string & outputFile); /** Just run an experiment for classification. All parameters should be already set. @date 2022-02-02*/ @@ -481,6 +601,9 @@ namespace ksi virtual double elaborate_rmse_for_dataset (const dataset & ds) override; + /** @return a very short (symbolic) name of the neuro-fuzzy system + * @date 2024-04-21 * */ + virtual std::string get_brief_nfs_name () const; /** @return a short name of the neuro-fuzzy system */ virtual std::string get_nfs_name () const; /** @return a description of the neuro-fuzzy system */ @@ -495,6 +618,11 @@ namespace ksi @date 2021-09-21*/ void set_train_data_file(const std::string & file); + /** The method sets the validation dat file. + @param file file name for the validation data + @date 2024-03-16 */ + void set_validation_data_file(const std::string & file); + /** The method sets test data file. @param file file name for test data @date 2021-09-21*/ @@ -557,11 +685,31 @@ namespace ksi @date 2021-09-14*/ void set_train_dataset (const ksi::dataset & ds); + /** The method sets a test dataset. + @param ds a dataset to set + @date 2024-03-16 */ + void set_validation_dataset (const ksi::dataset & ds); + /** The method sets a test dataset. @param ds a dataset to set @date 2021-09-14*/ void set_test_dataset (const ksi::dataset & ds); + + public: + /** A report entry on the average number of rules for the train dataset. + * It is used in three way decision NFS and fuzzy three way decision NFS. + * @return empty string + * @date 2024-03-24 */ + virtual std::string report_average_number_of_rules_for_train () const; + + public: + /** A report entry on the average number of rules for the test dataset. + * It is used in three way decision NFS and fuzzy three way decision NFS. + @return empty string + @date 2024-03-24 */ + virtual std::string report_average_number_of_rules_for_test () const; + }; } diff --git a/source/neuro-fuzzy/nfs_prototype.cpp b/source/neuro-fuzzy/nfs_prototype.cpp index 259df99..ec4c05e 100644 --- a/source/neuro-fuzzy/nfs_prototype.cpp +++ b/source/neuro-fuzzy/nfs_prototype.cpp @@ -203,12 +203,12 @@ double ksi::nfs_prototype::discriminate(const ksi::datum& d) void ksi::nfs_prototype::train_discriminative_model(const ksi::dataset& ds) { - createFuzzyRulebase(-1, -1, -1, ds); + createFuzzyRulebase(-1, -1, -1, ds, ds); // validation = train } void ksi::nfs_prototype::train_generative_model(const ksi::dataset& ds) { - createFuzzyRulebase(-1, -1, -1, ds); + createFuzzyRulebase(-1, -1, -1, ds, ds); // validation = train } double ksi::nfs_prototype::answer(const ksi::datum& item) const diff --git a/source/neuro-fuzzy/subspace-annbfis.cpp b/source/neuro-fuzzy/subspace-annbfis.cpp index 8d9896b..225af6d 100644 --- a/source/neuro-fuzzy/subspace-annbfis.cpp +++ b/source/neuro-fuzzy/subspace-annbfis.cpp @@ -145,155 +145,188 @@ ksi::subspace_annbfis::subspace_annbfis(ksi::subspace_annbfis && a): neuro_fuzzy void ksi::subspace_annbfis::createFuzzyRulebase ( - int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const ksi::dataset& train) + int nClusteringIterations, int nTuningIterations, double eta, const ksi::dataset& train, const ksi::dataset& validation) { try { - const double INITIAL_W = 2.0; - - _nClusteringIterations = nClusteringIterations; - _nTuningIterations = nTuningIterations; - _TrainDataset = train; - - if (_pTnorm) - delete _pTnorm; - _pTnorm = new t_norm_product (); - - if (_pRulebase) - delete _pRulebase; - _pRulebase = new rulebase(); - - if (not _pImplication) - throw std::string("no implication"); - - // ksi::sfcm clusterer; - // clusterer.setNumberOfClusters(_nRules); - // clusterer.setNumberOfIterations(_nClusteringIterations); - - std::size_t nX = train.getNumberOfData(); - std::size_t nAttr = train.getNumberOfAttributes(); - std::size_t nAttr_1 = nAttr - 1; - - auto XY = train.splitDataSetVertically (nAttr - 1); - auto trainX = XY.first; - auto trainY = XY.second; - - auto podzial = doPartition(trainX); - auto nRules = podzial.getNumberOfClusters(); - - // pobranie danych w postaci macierzy: - auto wTrainX = trainX.getMatrix(); - auto wTrainY = trainY.getMatrix(); -// debug(wTrainY); - - std::vector wY(nX); - for (std::size_t x = 0; x < nX; x++) - wY[x] = wTrainY[x][0]; - - for (int c = 0; c < nRules; c++) - { - ksi::subspace_premise przeslanka; - auto klaster = podzial.getCluster(c); - - auto maxi_weight = 0.0; - for (std::size_t a = 0; a < nAttr_1; a++) - { - // trzeba sprawdzić, czy w przesłance nie są zerowe wartości rozmycia: - - przeslanka.addDescriptor(klaster->getDescriptor(a)); - // for augmentation of rule's desciptors' weights; - - auto w = klaster->getAddressOfDescriptor(a)->getWeight(); - if (maxi_weight < w) - maxi_weight = w; - } - - for (std::size_t a = 0; a < nAttr_1; a++) - przeslanka.augment_attribute(a, maxi_weight); - - // attribute augmentation ends here :-) - logicalrule regula (*_pTnorm, *_pImplication); - regula.setPremise(przeslanka); - consequence_CL konkluzja (std::vector(nAttr_1 + 1, 0.0), INITIAL_W); - regula.setConsequence(konkluzja); - _pRulebase->addRule(regula); - - } - - // dla wyznaczenia wartosci konkuzji: - std::vector> G_przyklad_regula; - - // mam zgrupowane dane, teraz trzeba nastroic system - for (int i = 0; i < nTuningIterations; i++) - { - if (i % 2 == 0) - { - G_przyklad_regula.clear(); // dla konkluzji - - // strojenie gradientowe - _pRulebase->reset_differentials(); - for (std::size_t x = 0; x < nX; x++) - { - // Uruchomienie strojenia gradiendowego. - double odpowiedz = _pRulebase->answer(wTrainX[x]); - // dla wyznaczania konkluzji: - auto localisation_weight = _pRulebase->get_last_rules_localisations_weights(); - std::vector Gs; - for (auto & p : localisation_weight) - Gs.push_back(p.second); - - G_przyklad_regula.push_back(Gs); - // no i juz zwykla metoda gradientowa - _pRulebase->cummulate_differentials(wTrainX[x], wY[x]); - } - _pRulebase->actualise_parameters(dbLearningCoefficient); - } - else - { - // wyznaczanie wspolczynnikow konkluzji. - least_square_error_regression lser ((nAttr_1 + 1) * nRules); - - // przygotowanie wektora D - for (std::size_t x = 0; x < nX; x++) - { - auto G_suma = std::accumulate(G_przyklad_regula[x].begin(), - G_przyklad_regula[x].end(), 0.0); - - std::vector linia((nAttr_1 + 1) * nRules); - int index = 0; - for (int r = 0; r < nRules; r++) - { - auto S = G_przyklad_regula[x][r] / G_suma; - for (std::size_t a = 0; a < nAttr_1; a++) - linia[index++] = S * wTrainX[x][a]; - linia[index++] = S; - } - lser.read_data_item(linia, wY[x]); - } - auto p = lser.get_regression_coefficients(); - - // teraz zapis do regul: + const double INITIAL_W = 2.0; + + _nClusteringIterations = nClusteringIterations; + _nTuningIterations = nTuningIterations; + _TrainDataset = train; + + if (_pTnorm) + delete _pTnorm; + _pTnorm = new t_norm_product (); + + if (_pRulebase) + delete _pRulebase; + _pRulebase = new rulebase(); + + if (not _pImplication) + throw std::string("no implication"); + + // remember the best rulebase: + std::deque errors; + std::unique_ptr pTheBest (_pRulebase->clone()); + double dbTheBestRMSE = std::numeric_limits::max(); + //////// + + // ksi::sfcm clusterer; + // clusterer.setNumberOfClusters(_nRules); + // clusterer.setNumberOfIterations(_nClusteringIterations); + + std::size_t nX = train.getNumberOfData(); + std::size_t nAttr = train.getNumberOfAttributes(); + std::size_t nAttr_1 = nAttr - 1; + + auto XY = train.splitDataSetVertically (nAttr - 1); + auto trainX = XY.first; + auto trainY = XY.second; + + auto XYval = validation.splitDataSetVertically(nAttr - 1); + auto validateX = XYval.first; + auto validateY = XYval.second; + + auto mvalidateY = validateY.getMatrix(); + auto nValY = validateY.getNumberOfData(); + std::vector wvalidateY (nValY); + for (std::size_t x = 0; x < nValY; x++) + wvalidateY[x] = mvalidateY[x][0]; + //////////////////////// + + auto podzial = doPartition(trainX); + auto nRules = podzial.getNumberOfClusters(); + + // pobranie danych w postaci macierzy: + auto wTrainX = trainX.getMatrix(); + auto wTrainY = trainY.getMatrix(); + // debug(wTrainY); + + std::vector wY(nX); + for (std::size_t x = 0; x < nX; x++) + wY[x] = wTrainY[x][0]; + + for (int c = 0; c < nRules; c++) + { + ksi::subspace_premise przeslanka; + auto klaster = podzial.getCluster(c); + + auto maxi_weight = 0.0; + for (std::size_t a = 0; a < nAttr_1; a++) + { + // trzeba sprawdzić, czy w przesłance nie są zerowe wartości rozmycia: + + przeslanka.addDescriptor(klaster->getDescriptor(a)); + // for augmentation of rule's desciptors' weights; + + auto w = klaster->getAddressOfDescriptor(a)->getWeight(); + if (maxi_weight < w) + maxi_weight = w; + } + + for (std::size_t a = 0; a < nAttr_1; a++) + przeslanka.augment_attribute(a, maxi_weight); + + // attribute augmentation ends here :-) + logicalrule regula (*_pTnorm, *_pImplication); + regula.setPremise(przeslanka); + consequence_CL konkluzja (std::vector(nAttr_1 + 1, 0.0), INITIAL_W); + regula.setConsequence(konkluzja); + _pRulebase->addRule(regula); + + } + + // dla wyznaczenia wartosci konkuzji: + std::vector> G_przyklad_regula; + + // mam zgrupowane dane, teraz trzeba nastroic system + for (int i = 0; i < nTuningIterations; i++) + { + if (i % 2 == 0) + { + G_przyklad_regula.clear(); // dla konkluzji + + // strojenie gradientowe + _pRulebase->reset_differentials(); + for (std::size_t x = 0; x < nX; x++) + { + // Uruchomienie strojenia gradiendowego. + double odpowiedz = _pRulebase->answer(wTrainX[x]); + // dla wyznaczania konkluzji: + auto localisation_weight = _pRulebase->get_last_rules_localisations_weights(); + std::vector Gs; + for (auto & p : localisation_weight) + Gs.push_back(p.second); + + G_przyklad_regula.push_back(Gs); + // no i juz zwykla metoda gradientowa + _pRulebase->cummulate_differentials(wTrainX[x], wY[x]); + } + _pRulebase->actualise_parameters(eta); + } + else + { + // wyznaczanie wspolczynnikow konkluzji. + least_square_error_regression lser ((nAttr_1 + 1) * nRules); + + // przygotowanie wektora D + for (std::size_t x = 0; x < nX; x++) + { + auto G_suma = std::accumulate(G_przyklad_regula[x].begin(), + G_przyklad_regula[x].end(), 0.0); + + std::vector linia((nAttr_1 + 1) * nRules); + int index = 0; for (int r = 0; r < nRules; r++) { - std::vector coeff (nAttr_1 + 1); - - for (std::size_t a = 0; a < nAttr_1 + 1; a++) - coeff[a] = p[r * (nAttr_1 + 1) + a]; - consequence_CL konkluzja (coeff, INITIAL_W); - (*_pRulebase)[r].setConsequence(konkluzja); + auto S = G_przyklad_regula[x][r] / G_suma; + for (std::size_t a = 0; a < nAttr_1; a++) + linia[index++] = S * wTrainX[x][a]; + linia[index++] = S; } - } - - // test: wyznaczam blad systemu - std::vector wYelaborated (nX); - for (std::size_t x = 0; x < nX; x++) - wYelaborated[x] = answer( *(trainX.getDatum(x))); - - //ksi::error_RMSE rmse; - //double blad = rmse.getError(wY, wYelaborated); - //debug(blad); - } - // system nastrojony :-) + lser.read_data_item(linia, wY[x]); + } + auto p = lser.get_regression_coefficients(); + + // teraz zapis do regul: + for (int r = 0; r < nRules; r++) + { + std::vector coeff (nAttr_1 + 1); + + for (std::size_t a = 0; a < nAttr_1 + 1; a++) + coeff[a] = p[r * (nAttr_1 + 1) + a]; + consequence_CL konkluzja (coeff, INITIAL_W); + (*_pRulebase)[r].setConsequence(konkluzja); + } + } + + ////////////////////////////////// + // test: wyznaczam blad systemu + + std::vector wYelaborated (nValY); + for (std::size_t x = 0; x < nX; x++) + wYelaborated[x] = answer( *(validateX.getDatum(x))); + + /////////////////////////// + ksi::error_RMSE rmse; + double blad = rmse.getError(wvalidateY, wYelaborated); + // std::cout << __FILE__ << " (" << __LINE__ << ") " << "coeff: " << eta << ", iter: " << i << ", RMSE(train): " << blad << std::endl; + errors.push_front(blad); + + eta = modify_learning_coefficient(eta, errors); // modify learning coefficient + // remember the best rulebase: + if (dbTheBestRMSE > blad) + { + dbTheBestRMSE = blad; + pTheBest = std::unique_ptr(_pRulebase->clone()); + } + /////////////////////////// + } + // system nastrojony :-) + // update the rulebase with the best one: + delete _pRulebase; + _pRulebase = pTheBest->clone(); } CATCH; } diff --git a/source/neuro-fuzzy/subspace-annbfis.h b/source/neuro-fuzzy/subspace-annbfis.h index cc669c7..2d98e3c 100644 --- a/source/neuro-fuzzy/subspace-annbfis.h +++ b/source/neuro-fuzzy/subspace-annbfis.h @@ -109,14 +109,16 @@ namespace ksi * @param nClusteringIterations number of clustering iterations * @param nTuningIterations number of tuning iterations * @param dbLearningCoefficient learning coefficient for gradient method - * @param train train dataset + * @param train train dataset + * @param validation validation dataset * @date 2018-02-24 * @author Krzysztof Siminski */ virtual void createFuzzyRulebase ( int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const dataset & train); + const dataset & train, + const dataset & validation); protected: void set_name (); // virtual partition doPartition (const dataset & X); diff --git a/source/neuro-fuzzy/three_way_decision_nfs.cpp b/source/neuro-fuzzy/three_way_decision_nfs.cpp index 638b881..38f7ad1 100644 --- a/source/neuro-fuzzy/three_way_decision_nfs.cpp +++ b/source/neuro-fuzzy/three_way_decision_nfs.cpp @@ -1,11 +1,11 @@ - #include #include #include #include #include #include +#include #include "../neuro-fuzzy/three_way_decision_nfs.h" #include "../neuro-fuzzy/neuro-fuzzy-system.h" @@ -19,39 +19,143 @@ ksi::three_way_decision_nfs::three_way_decision_nfs () _threshold_type = ksi::roc_threshold::none; } -ksi::three_way_decision_nfs::three_way_decision_nfs(const std::vector >& cascade) : _cascade(cascade) +ksi::three_way_decision_nfs::three_way_decision_nfs(const std::vector >& cascade) : _cascade(cascade) +{ + set_positive_class(cascade[0]->get_positive_class()); + set_negative_class(cascade[0]->get_negative_class()); + _threshold_type = ksi::roc_threshold::none; +} + +ksi::three_way_decision_nfs::three_way_decision_nfs( + const std::vector > & cascade, + const std::string & train, + const std::string & test, + const std::string & result, + const double maximal_deviation + ) : ksi::neuro_fuzzy_system(train, test, result), _cascade(cascade) +{ + set_positive_class(cascade[0]->get_positive_class()); + set_negative_class(cascade[0]->get_negative_class()); + _threshold_type = ksi::roc_threshold::none; + _noncommitment_widths = std::vector (cascade.size(), maximal_deviation); +} + +ksi::three_way_decision_nfs::three_way_decision_nfs( + const std::vector > & cascade, + const std::string & train, + const std::string & test, + const std::string & result, + const std::vector & maximal_deviations + ) : ksi::neuro_fuzzy_system(train, test, result), _cascade(cascade), _noncommitment_widths(maximal_deviations) { set_positive_class(cascade[0]->get_positive_class()); set_negative_class(cascade[0]->get_negative_class()); _threshold_type = ksi::roc_threshold::none; + + try + { + if (cascade.size() != maximal_deviations.size()) + { + std::stringstream sos; + sos << "The lenght of the cascade (" << _cascade.size() << ") and the number of maximal_deviations (" << _noncommitment_widths.size() << ") do not match."; + throw sos.str(); + } + } + CATCH; } + ksi::three_way_decision_nfs::three_way_decision_nfs( const std::vector > & cascade, const std::string & train, + const std::string & validation, const std::string & test, const std::string & result, const double maximal_deviation - ) : ksi::neuro_fuzzy_system(train, test, result), _noncommintment_value(maximal_deviation), _cascade(cascade) + ) : ksi::neuro_fuzzy_system(train, validation, test, result), _cascade(cascade) { set_positive_class(cascade[0]->get_positive_class()); set_negative_class(cascade[0]->get_negative_class()); _threshold_type = ksi::roc_threshold::none; + _noncommitment_widths = std::vector (cascade.size(), maximal_deviation); +} + +ksi::three_way_decision_nfs::three_way_decision_nfs(const std::vector>& cascade, const double maximal_deviation) +: _cascade(cascade) +{ + _threshold_type = ksi::roc_threshold::none; + _noncommitment_widths = std::vector (cascade.size(), maximal_deviation); +} + +ksi::three_way_decision_nfs::three_way_decision_nfs(const std::vector>& cascade, const std::vector & maximal_deviations) +: _cascade(cascade), _noncommitment_widths(maximal_deviations) +{ + try + { + if (cascade.size() != maximal_deviations.size()) + { + std::stringstream sos; + sos << "The lenght of the cascade (" << _cascade.size() << ") and the number of maximal_deviations (" << _noncommitment_widths.size() << ") do not match."; + throw sos.str(); + } + } + CATCH; } + ksi::three_way_decision_nfs::three_way_decision_nfs( const std::vector >& cascade, const ksi::dataset& train, const ksi::dataset& test, const std::string& result, const double maximal_deviation -) : ksi::neuro_fuzzy_system(train, test, result), _noncommintment_value(maximal_deviation), _cascade(cascade) +) : ksi::neuro_fuzzy_system(train, test, result), _cascade(cascade) { set_positive_class(cascade[0]->get_positive_class()); set_negative_class(cascade[0]->get_negative_class()); _threshold_type = ksi::roc_threshold::none; + _noncommitment_widths = std::vector (cascade.size(), maximal_deviation); +} + + +ksi::three_way_decision_nfs::three_way_decision_nfs( + const std::vector >& cascade, + const ksi::dataset& train, + const ksi::dataset& test, + const std::string& result, + const std::vector & maximal_deviations +) : ksi::neuro_fuzzy_system(train, test, result), _cascade(cascade), _noncommitment_widths(maximal_deviations) +{ + set_positive_class(cascade[0]->get_positive_class()); + set_negative_class(cascade[0]->get_negative_class()); + _threshold_type = ksi::roc_threshold::none; + + try + { + if (cascade.size() != maximal_deviations.size()) + { + std::stringstream sos; + sos << "The lenght of the cascade (" << _cascade.size() << ") and the number of maximal_deviations (" << _noncommitment_widths.size() << ") do not match."; + throw sos.str(); + } + } + CATCH; } +ksi::three_way_decision_nfs::three_way_decision_nfs( + const std::vector >& cascade, + const ksi::dataset & train, + const ksi::dataset & validation, + const ksi::dataset & test, + const std::string & result, + const double maximal_deviation +) : ksi::neuro_fuzzy_system(train, validation, test, result), _cascade(cascade) +{ + set_positive_class(cascade[0]->get_positive_class()); + set_negative_class(cascade[0]->get_negative_class()); + _threshold_type = ksi::roc_threshold::none; + _noncommitment_widths = std::vector (cascade.size(), maximal_deviation); +} void ksi::three_way_decision_nfs::copy_fields(const ksi::three_way_decision_nfs& _3wnfs) @@ -59,13 +163,17 @@ void ksi::three_way_decision_nfs::copy_fields(const ksi::three_way_decision_nfs& _positive_class = _3wnfs._positive_class; _negative_class = _3wnfs._negative_class; _threshold_type = _3wnfs._threshold_type; + _noncommitment_widths = _3wnfs._noncommitment_widths; + + _dbTestAverageNumerOfRulesUsed = _3wnfs._dbTestAverageNumerOfRulesUsed; + _dbTrainAverageNumerOfRulesUsed = _3wnfs._dbTrainAverageNumerOfRulesUsed; + _number_of_rules_used = _3wnfs._number_of_rules_used; + _number_of_data_items = _3wnfs._number_of_data_items; } - ksi::three_way_decision_nfs::~three_way_decision_nfs () { // delete what is to delete - } ksi::three_way_decision_nfs::three_way_decision_nfs (const ksi::three_way_decision_nfs & wzor) : ksi::neuro_fuzzy_system(wzor) @@ -116,13 +224,11 @@ ksi::three_way_decision_nfs & ksi::three_way_decision_nfs::operator= (ksi::three return *this; } - ksi::neuro_fuzzy_system * ksi::three_way_decision_nfs::clone() const { return new ksi::three_way_decision_nfs(*this); } - ksi::discriminative_model * ksi::three_way_decision_nfs::clone_discriminant() const { return new ksi::three_way_decision_nfs(*this); @@ -145,17 +251,19 @@ void ksi::three_way_decision_nfs::train_discriminative_model(const ksi::dataset& throw (std::string ("not implemented")); } -void ksi::three_way_decision_nfs::createFuzzyRulebase(int, int, double, const ksi::dataset& train) +void ksi::three_way_decision_nfs::createFuzzyRulebase(int, int, double, const ksi::dataset& train, const ksi::dataset& validate) { - createFuzzyRulebase(train, this->_TestDataset); + createFuzzyRulebase(train, this->_TestDataset, validate); } -void ksi::three_way_decision_nfs::createFuzzyRulebase(const ksi::dataset& train, const ksi::dataset & test) + +/// @todo Po co test tutaj? +void ksi::three_way_decision_nfs::createFuzzyRulebase(const ksi::dataset& train, const ksi::dataset & test, const ksi::dataset & validation) { try { auto zbior_treningowy = train; - + auto nAttributes = zbior_treningowy.getNumberOfAttributes(); bool remove_system = false; for (std::size_t i = 0; i < _cascade.size(); i++) { @@ -167,9 +275,11 @@ void ksi::three_way_decision_nfs::createFuzzyRulebase(const ksi::dataset& train, { auto & pSystem = _cascade[i]; pSystem->set_train_data_file(this->_train_data_file); + pSystem->set_validation_data_file (this->_validation_data_file); pSystem->set_test_data_file (this->_test_data_file); pSystem->set_output_file(this->_output_file + std::to_string(i)); pSystem->set_train_dataset(zbior_treningowy); + pSystem->set_validation_dataset(validation); pSystem->set_test_dataset(test); pSystem->experiment_classification_core(); @@ -178,8 +288,9 @@ void ksi::three_way_decision_nfs::createFuzzyRulebase(const ksi::dataset& train, auto threshold_value = pSystem->get_threshold_value(); // zapisanie do zbioru_treningowego danych bliskich progowi - zbior_treningowy = extract_poor_results(zbior_treningowy, results_train, threshold_value, _noncommintment_value); - if (zbior_treningowy.size() < 1 or zbior_treningowy.size() < zbior_treningowy.getNumberOfAttributes()) + zbior_treningowy = extract_poor_results(zbior_treningowy, results_train, threshold_value, _noncommitment_widths[i]); + + if (zbior_treningowy.size() < nAttributes) { // wszystkie nastepne systemy trzeba skasowac remove_system = true; @@ -197,40 +308,76 @@ double ksi::three_way_decision_nfs::answer(const ksi::datum& item) const try { double result = std::numeric_limits::signaling_NaN(); - for (auto & pSystem : _cascade) + + std::size_t number_of_rules = 0; + for (std::size_t i = 0; i < _cascade.size(); i++) { + auto & pSystem = _cascade[i]; + number_of_rules += pSystem->get_number_of_rules(); result = pSystem->answer(item); auto threshold_value = pSystem->get_threshold_value(); - if (std::fabs(result - threshold_value) > _noncommintment_value) + if (std::fabs(result - threshold_value) > _noncommitment_widths[i]) + { + _number_of_rules_used += number_of_rules; return result; + } } return result; } CATCH; } +std::vector> ksi::three_way_decision_nfs::get_answers_for_train_classification() +{ + _number_of_rules_used = 0; + _number_of_data_items = 0; + auto result = ksi::neuro_fuzzy_system::get_answers_for_train_classification(); + auto data_size = _TrainDataset.size(); + _dbTrainAverageNumerOfRulesUsed = 1.0 * _number_of_rules_used / _number_of_data_items; + return result; +} + + +std::vector> ksi::three_way_decision_nfs::get_answers_for_test_classification() +{ + _number_of_rules_used = 0; + _number_of_data_items = 0; + auto result = ksi::neuro_fuzzy_system::get_answers_for_test_classification(); + auto data_size = _TestDataset.size(); + _dbTestAverageNumerOfRulesUsed = 1.0 * _number_of_rules_used / _number_of_data_items; + return result; +} std::pair ksi::three_way_decision_nfs::answer_classification(const ksi::datum& item) const { try { - auto nan = std::numeric_limits::signaling_NaN(); - auto result = std::make_pair (nan, nan); - for (auto & pSystem : _cascade) - { - result = pSystem->answer_classification(item); - auto threshold_value = pSystem->get_threshold_value(); - auto numeric = result.first; - if (std::fabs(numeric - threshold_value) > _noncommintment_value) - return result; - } - return result; + auto nan = std::numeric_limits::signaling_NaN(); + auto result = std::make_pair (nan, nan); + std::size_t number_of_rules = 0; + + for (std::size_t i = 0; i < _cascade.size(); i++) + { + auto & pSystem = _cascade[i]; + result = pSystem->answer_classification(item); + number_of_rules += pSystem->get_number_of_rules(); + auto threshold_value = pSystem->get_threshold_value(); + auto numeric = result.first; + if (std::fabs(numeric - threshold_value) > _noncommitment_widths[i]) + { + ++_number_of_data_items; + _number_of_rules_used += number_of_rules; + return result; + } + } + ++_number_of_data_items; + _number_of_rules_used += number_of_rules; + return result; } CATCH; } - ksi::dataset ksi::three_way_decision_nfs::extract_poor_results( const ksi::dataset & data, const std::vector> & results_train, @@ -258,8 +405,6 @@ ksi::dataset ksi::three_way_decision_nfs::extract_poor_results( } CATCH; } - - ksi::number ksi::three_way_decision_nfs::elaborate_answer(const ksi::datum& d) const { @@ -267,8 +412,6 @@ ksi::number ksi::three_way_decision_nfs::elaborate_answer(const ksi::datum& d) c throw (std::string ("not implemented")); } - - std::string ksi::three_way_decision_nfs::get_nfs_description() const { std::stringstream ss; @@ -283,16 +426,42 @@ std::string ksi::three_way_decision_nfs::get_nfs_description() const std::string ksi::three_way_decision_nfs::get_nfs_name() const { - std::stringstream ss; std::string description {"3wdnfs-classifier"}; - ss << description; - for (const auto & s : _cascade) - { - ss << "-" << s->get_nfs_name(); - } + + std::stringstream ss; + ss << description << get_cascade_names(); + return ss.str(); +} + +std::string ksi::three_way_decision_nfs::get_brief_nfs_name() const +{ + std::string description {"3wd"}; + + std::stringstream ss; + ss << description << get_brief_cascade_names(); return ss.str(); } +std::string ksi::three_way_decision_nfs::get_cascade_names() const +{ + std::stringstream ss; + for (const auto & s : _cascade) + { + ss << "-" << s->get_nfs_name(); + } + return ss.str(); +} + +std::string ksi::three_way_decision_nfs::get_brief_cascade_names() const +{ + std::stringstream ss; + for (const auto & s : _cascade) + { + ss << "-" << s->get_brief_nfs_name(); + } + return ss.str(); +} + std::string ksi::three_way_decision_nfs::extra_report() const { try @@ -322,8 +491,9 @@ std::string ksi::three_way_decision_nfs::classification_intro() const // model << "number of clustering iterations: " << _nClusteringIterations << std::endl; // model << "number of tuning interations: " << _nTuningIterations << std::endl; // model << "learning coefficient: " << _dbLearningCoefficient << std::endl; - model << "train data file: " << _train_data_file << std::endl; - model << "test data file: " << _test_data_file << std::endl; + model << "train data file: " << _train_data_file << std::endl; + model << "validation data file: " << _validation_data_file << std::endl; + model << "test data file: " << _test_data_file << std::endl; // model << "normalisation: " << std::boolalpha << _bNormalisation << std::endl; // if (_pModyfikator) // model << "train data set modifier(s): " << _pModyfikator->print() << std::endl; @@ -342,21 +512,27 @@ void ksi::three_way_decision_nfs::printRulebase(std::ostream& ss) { try { - ss << "Rulebases of three way decision system" << std::endl; - ss << "--------------------------------------" << std::endl; + ss << std::endl; + ss << "Rulebases of neuro-fuzzy systems in the cascade" << std::endl; + ss << "-----------------------------------------------" << std::endl; ss << std::endl; ss << "number of classifiers: " << _cascade.size() << std::endl; std::size_t i = 1; - for (const auto & p : _cascade) + for (std::size_t i = 0; i < _cascade.size(); i++) { + auto & p = _cascade[i]; ss << std::endl; ss << "classifier: " << i << std::endl; ss << p->get_nfs_name() << std::endl; - ss << "threshold type: " << ksi::to_string(p->get_threshold_type()) << std::endl; - ss << "threshold value: " << p->get_threshold_value() << i << std::endl; + ss << "number of rules: " << p->get_number_of_rules() << std::endl; + ss << "threshold type: " << ksi::to_string(p->get_threshold_type()) << std::endl; + ss << "threshold value: " << p->get_threshold_value() << i << std::endl; + ss << "noncommitment width: " << _noncommitment_widths[i] << std::endl; + ss << "size of train dataset: " << p->get_train_dataset_size() << std::endl; + ss << "cardinality of train dataset: " << p->get_train_dataset_cardinality() << std::endl; ss << "--------------------------------------" << std::endl; ss << std::endl; ss << "fuzzy rule base" << std::endl; @@ -364,9 +540,8 @@ void ksi::three_way_decision_nfs::printRulebase(std::ostream& ss) ss << std::endl; ss << "linguistic description of fuzzy rule base" << std::endl; p->printLinguisticDescriptionRulebase(ss); - i++; } - ss << "--------------------------------------" << std::endl; + ss << "-----------------------------------------------" << std::endl; } CATCH; } @@ -378,6 +553,11 @@ double ksi::three_way_decision_nfs::elaborate_threshold_value( double negativeClassvalue, const ksi::roc_threshold& type) { +// try +// { +// throw std::string ("Do not use this method! This NFS has a cascade of NFSs. Each of them has its own threshold value."); +// } +// CATCH; return 0.0; } @@ -386,11 +566,40 @@ std::string ksi::three_way_decision_nfs::get_classification_threshold_value() co return std::string {}; // empty string } +double ksi::three_way_decision_nfs::get_number_of_rules () const +{ + return _dbTestAverageNumerOfRulesUsed; +} + +std::string ksi::three_way_decision_nfs::report_average_number_of_rules_for_test() const +{ + std::string stringified; + if (std::isnan(_dbTestAverageNumerOfRulesUsed)) + stringified = "NaN"; + else + stringified = std::to_string(_dbTestAverageNumerOfRulesUsed); + + return std::string {"average number of rules used for one test data item: "} + stringified; +} + +std::string ksi::three_way_decision_nfs::report_average_number_of_rules_for_train() const +{ + std::string stringified; + if (std::isnan(_dbTrainAverageNumerOfRulesUsed)) + stringified = "NaN"; + else + stringified = std::to_string(_dbTrainAverageNumerOfRulesUsed); + + return std::string {"average number of rules used for one train data item: "} + stringified; +} + + namespace ksi { std::ostream & operator<< (std::ostream & ss, const ksi::three_way_decision_nfs & system) { + ss << std::endl; ss << "Three way decision system" << std::endl; ss << "-------------------------" << std::endl; @@ -398,17 +607,20 @@ namespace ksi ss << "number of classifiers: " << system._cascade.size() << std::endl; - std::size_t i = 1; - for (const auto & p : system._cascade) + + for (std::size_t i = 0; i < system._cascade.size(); i++) { + auto & p = system._cascade[i]; ss << "classifier: " << i << std::endl; // ss << p->printRulebase() << std::endl; ss << p->get_nfs_name() << std::endl; - ss << "threshold type: " << to_string(p->get_threshold_type()) << std::endl; - ss << "threshold value: " << p->get_threshold_value() << std::endl; - i++; + ss << "number of rules: " << p->get_number_of_rules() << std::endl; + ss << "threshold type: " << to_string(p->get_threshold_type()) << std::endl; + ss << "threshold value: " << p->get_threshold_value() << std::endl; + ss << "noncommitment width: " << system._noncommitment_widths[i] << std::endl; + ss << std::endl; } - + ss << "-------------------------" << std::endl; return ss; } } diff --git a/source/neuro-fuzzy/three_way_decision_nfs.h b/source/neuro-fuzzy/three_way_decision_nfs.h index 10e98b3..43652ae 100644 --- a/source/neuro-fuzzy/three_way_decision_nfs.h +++ b/source/neuro-fuzzy/three_way_decision_nfs.h @@ -13,7 +13,7 @@ namespace ksi { /** Three way decision neuro-fuzzy classifier. - The main feature of the system is the three way decision. It returns three answers: positive class, negative class, or noncommintment decision (“I do not know”, “I cannot decide” answer). + The main feature of the system is the three way decision. It returns three answers: positive class, negative class, or noncommitment decision (“I do not know”, “I cannot decide” answer). @author Krzysztof Siminski @date 2021-09-12 @@ -21,12 +21,25 @@ namespace ksi class three_way_decision_nfs : public neuro_fuzzy_system { protected: - double _noncommintment_value = 0.2; ///< maximal deviation from the threshold in classification + mutable std::size_t _number_of_rules_used {0}; + mutable std::size_t _number_of_data_items {0}; + mutable double _dbTrainAverageNumerOfRulesUsed {0.0}; + mutable double _dbTestAverageNumerOfRulesUsed {0.0}; + + public: + std::vector> get_answers_for_train_classification() override; + + public: + std::vector> get_answers_for_test_classification() override; + + protected: /** This is the crucial part of this three-way-decision system – a cascade of classifiers. */ std::vector> _cascade; - - + protected: + /*** halfs of width of concommitment intervals for each NFS in the cascade */ + std::vector _noncommitment_widths; + public: three_way_decision_nfs (); three_way_decision_nfs (const std::vector> & cascade); @@ -35,7 +48,8 @@ namespace ksi @param train name of train dataset file @param test name of test dataset file @param result name of output result file - @param maximal_deviation maximal deviation from a classification threshold + @param maximal_deviation maximal deviation from a classification threshold, + the same of all systems in a cascade @date 2021-09-14 */ three_way_decision_nfs (const std::vector> & cascade, @@ -44,6 +58,54 @@ namespace ksi const std::string & result, const double maximal_deviation ); + + /** @param cascade cascade of neuro-fuzzy systems + @param train name o*f train dataset file + @param test name of test dataset file + @param result name of output result file + @param maximal_deviations maximal deviations from a classification thresholds for each system separatedly + @date 2024-04-23 + */ + three_way_decision_nfs (const std::vector> & cascade, + const std::string & train, + const std::string & test, + const std::string & result, + const std::vector & maximal_deviations + ); + + + /** @param cascade cascade of neuro-fuzzy systems + @param maximal_deviation maximal deviation from a classification threshold, + the same of all systems in a cascade + @date 2024-03-16 + */ + three_way_decision_nfs (const std::vector> & cascade, + const double maximal_deviation + ); + + /** @param cascade cascade of neuro-fuzzy systems + * @param maximal_deviations maximal deviations from a classification thresholds for each system separatedly + * @date 2024-03-17 + */ + three_way_decision_nfs (const std::vector> & cascade, + const std::vector & maximal_deviations); + + + /** @param cascade cascade of neuro-fuzzy systems + @param train name of train dataset file + @param validation name of validation file + @param test name of test dataset file + @param result name of output result file + @param maximal_deviation maximal deviation from a classification threshold + @date 2024-03-11 + */ + three_way_decision_nfs (const std::vector> & cascade, + const std::string & train, + const std::string & validation, + const std::string & test, + const std::string & result, + const double maximal_deviation + ); /** @param cascade cascade of neuro-fuzzy systems @param train train dataset @@ -59,13 +121,42 @@ namespace ksi const double maximal_deviation ); + /** @param cascade cascade of neuro-fuzzy systems + @param train train dataset + @param test test dataset + @param result name of output result file + @param maximal_deviations maximal deviations from a classification threshold for each system separatedly + @date 2024-04-23 + */ + three_way_decision_nfs (const std::vector> & cascade, + const ksi::dataset & train, + const ksi::dataset & test, + const std::string & result, + const std::vector & maximal_deviations + ); + + /** @param cascade cascade of neuro-fuzzy systems + @param train train dataset + @param validation validation dataset + @param test test dataset + @param result name of output result file + @param maximal_deviation maximal deviation from a classification threshold + @date 2024-03-11 + */ + three_way_decision_nfs (const std::vector> & cascade, + const ksi::dataset & train, + const ksi::dataset & validation, + const ksi::dataset & test, + const std::string & result, + const double maximal_deviation + ); + three_way_decision_nfs (const three_way_decision_nfs & wzor); three_way_decision_nfs (three_way_decision_nfs && wzor); three_way_decision_nfs & operator= (const three_way_decision_nfs & wzor); three_way_decision_nfs & operator= (three_way_decision_nfs && wzor); virtual ~three_way_decision_nfs (); - public: ksi::neuro_fuzzy_system * clone() const override; @@ -85,15 +176,16 @@ namespace ksi @param train trainset to train the system @date 2021-09-14 @author Krzysztof Siminski */ - virtual void createFuzzyRulebase (int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const dataset & train) override; + virtual void createFuzzyRulebase (int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const dataset & train, const dataset & validation) override; /** The method trains a cascade of neuro-fuzzy systems. - @param train train dataset - @param test test dataset + @param train train dataset + @param test test dataset + @param validation validation dataset @date 2021-09-14 @author Krzysztof Siminski */ - virtual void createFuzzyRulebase(const ksi::dataset & train, const ksi::dataset & test) override; + virtual void createFuzzyRulebase(const ksi::dataset & train, const ksi::dataset & test, const ksi::dataset & validation) override; virtual number elaborate_answer (const datum & d) const override; @@ -105,16 +197,25 @@ namespace ksi */ virtual std::pair answer_classification (const datum & item) const override; - /** The method prints rule base. + /** The method prints rule base. * @param ss ostream to print to */ virtual void printRulebase(std::ostream & ss) override; - - + /** @return a short name of the neuro-fuzzy system */ virtual std::string get_nfs_name () const override; + /** @return a brief name of the neuro-fuzzy system + * @date 2024-04-21 */ + virtual std::string get_brief_nfs_name () const override; /** @return a description of the neuro-fuzzy system */ virtual std::string get_nfs_description () const override; + /** @return a description of the cascade + * @date 2024-03-05 */ + virtual std::string get_cascade_names () const; + /** @return a brief description of the cascade + * @date 2024-04-21 */ + virtual std::string get_brief_cascade_names () const; + protected: /** @return The method returns always an empty string. @date 2021-09-28 @@ -122,41 +223,60 @@ namespace ksi virtual std::string get_classification_threshold_value () const override; protected: - /** The method elaborates a classification threshold. + /** This system is composed of a cascade of NFSs. + * Each of them has is own threshold. + * There is no general threshold. This is why zero is returned. + * The method is to satisfy the concordance with other NFS. @param Expected not used @param Elaborated not used @param positiveClassvalue not used @param negativeClassvalue not used @param type not used - @return always zero - + @return always zero */ virtual double elaborate_threshold_value (std::vector & Expected, std::vector & Elaborated, double positiveClassvalue, double negativeClassvalue, const ksi::roc_threshold & type) override; - - + protected: /** @todo Ta metoda jest do poprawy. Nie działa dobrze. */ ksi::dataset extract_poor_results(const dataset & data, - const std::vector> & results_train, + const std::vector> & results_train, const double threshold_value, const double maximal_deviation); protected: std::string extra_report() const override; - virtual std::string classification_intro() const override; - virtual void copy_fields(const three_way_decision_nfs & _3wnfs); + + public: + /** A report entry on the average number of rules for the train dataset. + * It is used in three way decision NFS and fuzzy three way decision NFS. + * @return empty string + * @date 2024-03-24 */ + virtual std::string report_average_number_of_rules_for_train () const override; + + public: + /** @return average number of rules fired for the test dateset + * @date 2024-03-28 */ + virtual double get_number_of_rules () const override; + + public: + /** A report entry on the average number of rules for the test dataset. + * It is used in three way decision NFS and fuzzy three way decision NFS. + @ return empty string * + @date 2024-03-24 */ + virtual std::string report_average_number_of_rules_for_test () const override; + + public: friend std::ostream & operator<< (std::ostream & ss, const ksi::three_way_decision_nfs & system); }; } - #endif diff --git a/source/neuro-fuzzy/tsk.cpp b/source/neuro-fuzzy/tsk.cpp index 9f7d0ff..4111a0c 100644 --- a/source/neuro-fuzzy/tsk.cpp +++ b/source/neuro-fuzzy/tsk.cpp @@ -26,30 +26,14 @@ #include "../service/debug.h" #include "../auxiliary/error-RMSE.h" #include "../auxiliary/error-MAE.h" -#include "../common/number.h" +#include "../common/number.h" #include "../readers/reader-complete.h" #include "../common/data-modifier.h" #include "../common/data-modifier-normaliser.h" #include "../partitions/partition.h" #include "../gan/discriminative_model.h" #include "../gan/generative_model.h" - -ksi::partition ksi::tsk::doPartition(const ksi::dataset& X) -{ - //ksi::fcm clusterer; - - //clusterer.setNumberOfClusters(_nRules); - //clusterer.setNumberOfIterations(_nClusteringIterations); - //return clusterer.doPartition(X); - try - { - if (_pPartitioner) - return _pPartitioner->doPartition(X); - else - throw ksi::exception ("no clustering method provided"); - } - CATCH; -} + void ksi::tsk::set_name() { @@ -72,7 +56,6 @@ abstract_tsk(nRules, nClusteringIterations) set_name(); } - ksi::tsk::tsk(int nRules, int nClusteringIterations, int nTuningIterations, @@ -154,9 +137,6 @@ ksi::tsk::tsk(const std::string& trainDataFile, set_name(); } - - - ksi::tsk::tsk(int nRules, int nClusteringIterations, int nTuningIterations, @@ -232,6 +212,7 @@ ksi::tsk & ksi::tsk::operator= (ksi::tsk && right) if (this == & right) return *this; + ksi::neuro_fuzzy_system::operator=(right); ksi::abstract_tsk::operator=(right); return *this; diff --git a/source/neuro-fuzzy/tsk.h b/source/neuro-fuzzy/tsk.h index 2cff75b..ef3923f 100644 --- a/source/neuro-fuzzy/tsk.h +++ b/source/neuro-fuzzy/tsk.h @@ -150,16 +150,8 @@ namespace ksi virtual discriminative_model * clone_discriminant () const; virtual generative_model * clone_generator () const; - - protected: void set_name (); - - virtual partition doPartition (const dataset & X); - - public: - - }; } diff --git a/source/neuro-fuzzy/tsk_prototype.cpp b/source/neuro-fuzzy/tsk_prototype.cpp index dc0ceb8..69412a8 100644 --- a/source/neuro-fuzzy/tsk_prototype.cpp +++ b/source/neuro-fuzzy/tsk_prototype.cpp @@ -111,8 +111,8 @@ ksi::tsk_prototype::tsk_prototype(const int number_of_rules, void ksi::tsk_prototype::createFuzzyRulebase(int nClusteringIterations, int nTuningIterations, - double dbLearningCoefficient, - const ksi::dataset& train) + double eta, + const ksi::dataset& train, const ksi::dataset& validation) { try { @@ -121,6 +121,12 @@ void ksi::tsk_prototype::createFuzzyRulebase(int nClusteringIterations, if (_pRulebase) delete _pRulebase; _pRulebase = new rulebase(); + + // remember the best rulebase: + std::deque errors; + std::unique_ptr pTheBest (_pRulebase->clone()); + double dbTheBestRMSE = std::numeric_limits::max(); + ////// std::size_t nAttr = _TrainDataset.getNumberOfAttributes(); std::size_t nAttr_1 = nAttr - 1; @@ -129,6 +135,17 @@ void ksi::tsk_prototype::createFuzzyRulebase(int nClusteringIterations, auto trainX = XY.first; auto trainY = XY.second; + auto XYval = validation.splitDataSetVertically(nAttr - 1); + auto validateX = XYval.first; + auto validateY = XYval.second; + + auto mvalidateY = validateY.getMatrix(); + auto nValY = validateY.getNumberOfData(); + std::vector wvalidateY (nValY); + for (std::size_t x = 0; x < nValY; x++) + wvalidateY[x] = mvalidateY[x][0]; + //////////////////////// + _original_size_of_training_dataset = trainX.getNumberOfData(); ksi::partition podzial; @@ -165,7 +182,7 @@ void ksi::tsk_prototype::createFuzzyRulebase(int nClusteringIterations, } } CATCH; - try + try { // dla wyznaczenia wartosci konkuzji: std::vector> F_przyklad_regula; @@ -217,6 +234,7 @@ void ksi::tsk_prototype::createFuzzyRulebase(int nClusteringIterations, lser.read_data_item(linia, wY[x]); } auto p = lser.get_regression_coefficients(); + // teraz zapis do regul: #pragma omp parallel for for (int r = 0; r < _nRules; r++) @@ -233,15 +251,32 @@ void ksi::tsk_prototype::createFuzzyRulebase(int nClusteringIterations, } } - // test: wyznaczam blad systemu - std::vector wYelaborated (nX); -#pragma omp parallel for - for (std::size_t x = 0; x < nX; x++) - wYelaborated[x] = answer( *(trainX.getDatum(x))); + ////////////////////////////////// + // test: wyznaczam blad systemu + + std::vector wYelaborated (nValY); + for (std::size_t x = 0; x < nX; x++) + wYelaborated[x] = answer( *(validateX.getDatum(x))); + + /////////////////////////// + ksi::error_RMSE rmse; + double blad = rmse.getError(wvalidateY, wYelaborated); + errors.push_front(blad); + + eta = modify_learning_coefficient(eta, errors); // modify learning coefficient + // remember the best rulebase: + if (dbTheBestRMSE > blad) + { + dbTheBestRMSE = blad; + pTheBest = std::unique_ptr(_pRulebase->clone()); + } + /////////////////////////// } - } CATCH; // system nastrojony :-) + // update the rulebase with the best one: + delete _pRulebase; + _pRulebase = pTheBest->clone(); } CATCH; } diff --git a/source/neuro-fuzzy/tsk_prototype.h b/source/neuro-fuzzy/tsk_prototype.h index 26b3375..f77df82 100644 --- a/source/neuro-fuzzy/tsk_prototype.h +++ b/source/neuro-fuzzy/tsk_prototype.h @@ -121,7 +121,7 @@ namespace ksi const ksi::roc_threshold threshold_type); - virtual void createFuzzyRulebase(int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const ksi::dataset & train) override; + virtual void createFuzzyRulebase(int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, const ksi::dataset & train, const dataset & validation) override; public: ksi::neuro_fuzzy_system * clone() const override; diff --git a/source/neuro-fuzzy/weighted_annbfis.cpp b/source/neuro-fuzzy/weighted_annbfis.cpp index b01b6b5..1532b63 100644 --- a/source/neuro-fuzzy/weighted_annbfis.cpp +++ b/source/neuro-fuzzy/weighted_annbfis.cpp @@ -1,14 +1,16 @@ +#include + +#include "../neuro-fuzzy/neuro-fuzzy-system.h" #include "../neuro-fuzzy/abstract-annbfis.h" #include "../neuro-fuzzy/weighted_annbfis.h" #include "../neuro-fuzzy/logicalrule.h" #include "../neuro-fuzzy/consequence-CL.h" #include "../tnorms/t-norm-product.h" +#include "../auxiliary/error-RMSE.h" #include "../auxiliary/least-error-squares-regression.h" #include "../partitions/fcm-conditional.h" - - ksi::weighted_annbfis::weighted_annbfis () { set_name(); @@ -17,13 +19,28 @@ ksi::weighted_annbfis::weighted_annbfis () ksi::weighted_annbfis::~weighted_annbfis () { // delete what is to delete +} +ksi::weighted_annbfis::weighted_annbfis(int nRules, + int nClusteringIterations, + int nTuningIterations, + double dbLearningCoefficient, + bool bNormalisation, + const t_norm & tnorm, + const implication & imp, + double positive_class, + double negative_class, + const ksi::roc_threshold threshold_type +) :// neuro_fuzzy_system(ksi::fcm(nRules, nClusteringIterations)) +//, +abstract_annbfis (nRules, nClusteringIterations, nTuningIterations, dbLearningCoefficient, bNormalisation, tnorm, imp, ksi::fcm(nRules, nClusteringIterations), positive_class, negative_class, threshold_type ) +{ + set_name(); } -ksi::weighted_annbfis::weighted_annbfis (const ksi::weighted_annbfis & wzor) : ksi::abstract_annbfis(wzor) +ksi::weighted_annbfis::weighted_annbfis (const ksi::weighted_annbfis & wzor) : ksi::neuro_fuzzy_system(wzor), ksi::abstract_annbfis(wzor) { // copy what is to copy - } ksi::weighted_annbfis & ksi::weighted_annbfis::operator= (const ksi::weighted_annbfis & wzor) @@ -31,6 +48,7 @@ ksi::weighted_annbfis & ksi::weighted_annbfis::operator= (const ksi::weighted_an if (this == & wzor) return *this; + ksi::neuro_fuzzy_system::operator=(wzor); ksi::abstract_annbfis::operator=(wzor); // remove what is to remove @@ -40,7 +58,7 @@ ksi::weighted_annbfis & ksi::weighted_annbfis::operator= (const ksi::weighted_an return *this; } -ksi::weighted_annbfis::weighted_annbfis (ksi::weighted_annbfis && wzor) : ksi::abstract_annbfis(wzor) +ksi::weighted_annbfis::weighted_annbfis (ksi::weighted_annbfis && wzor) : ksi::neuro_fuzzy_system(wzor), ksi::abstract_annbfis(wzor) { // swap what is to swap @@ -51,6 +69,7 @@ ksi::weighted_annbfis & ksi::weighted_annbfis::operator= (ksi::weighted_annbfis if (this == & wzor) return *this; + ksi::neuro_fuzzy_system::operator=(wzor); ksi::abstract_annbfis::operator=(wzor); // swap what is to swap @@ -75,7 +94,7 @@ void ksi::weighted_annbfis::createFuzzyRulebase int nClusteringIterations, int nTuningIterations, double eta, - const ksi::dataset& train ) + const ksi::dataset& train, const ksi::dataset& validation) { try { @@ -84,6 +103,7 @@ void ksi::weighted_annbfis::createFuzzyRulebase // if (not _pPartitioner) // throw ksi::exception("no partition object provided"); + std::deque errors; const double INITIAL_W = 2.0; _nClusteringIterations = nClusteringIterations; @@ -97,6 +117,12 @@ void ksi::weighted_annbfis::createFuzzyRulebase if (_pRulebase) delete _pRulebase; _pRulebase = new rulebase(); + + // remember the best rulebase: + std::unique_ptr pTheBest (_pRulebase->clone()); + double dbTheBestRMSE = std::numeric_limits::max(); + //////// + std::size_t nAttr = train.getNumberOfAttributes(); std::size_t nAttr_1 = nAttr - 1; @@ -105,6 +131,17 @@ void ksi::weighted_annbfis::createFuzzyRulebase auto trainX = XY.first; auto trainY = XY.second; + auto XYval = validation.splitDataSetVertically(nAttr - 1); + auto validateX = XYval.first; + auto validateY = XYval.second; + + auto mvalidateY = validateY.getMatrix(); + auto nValY = validateY.getNumberOfData(); + std::vector wvalidateY (nValY); + for (std::size_t x = 0; x < nValY; x++) + wvalidateY[x] = mvalidateY[x][0]; + //////////////////////// + auto podzial = doPartition(trainX); _nRules = podzial.getNumberOfClusters(); _original_size_of_training_dataset = trainX.getNumberOfData(); @@ -210,19 +247,33 @@ void ksi::weighted_annbfis::createFuzzyRulebase (*_pRulebase)[r].setConsequence(konkluzja); } } - } + ////////////////////////////////// // test: wyznaczam blad systemu -// std::vector wYelaborated (nX); -// for (std::size_t x = 0; x < nX; x++) -// wYelaborated[x] = answer( *(trainX.getDatum(x))); -// - //ksi::error_RMSE rmse; - //double blad = rmse.getError(wY, wYelaborated); - //debug(blad); + std::vector wYelaborated (nValY); + for (std::size_t x = 0; x < nX; x++) + wYelaborated[x] = answer( *(validateX.getDatum(x))); - + /////////////////////////// + ksi::error_RMSE rmse; + double blad = rmse.getError(wvalidateY, wYelaborated); + // std::cout << __FILE__ << " (" << __LINE__ << ") " << "coeff: " << eta << ", iter: " << i << ", RMSE(train): " << blad << std::endl; + errors.push_front(blad); + + eta = modify_learning_coefficient(eta, errors); // modify learning coefficient + // remember the best rulebase: + if (dbTheBestRMSE > blad) + { + dbTheBestRMSE = blad; + pTheBest = std::unique_ptr(_pRulebase->clone()); + } + /////////////////////////// + + } // system nastrojony :-) + // update the rulebase with the best one: + delete _pRulebase; + _pRulebase = pTheBest->clone(); } CATCH; } @@ -231,7 +282,8 @@ void ksi::weighted_annbfis::createFuzzyRulebase void ksi::weighted_annbfis::set_name() { _name_of_neuro_fuzzy_system = std::string ("weighted-ANNBFIS"); - _description_of_neuro_fuzzy_system = std::string("weighted ANNBFIS, neuro-fuzzy system with logical interpretation of fuzzy rules, with weighted data items"); + _brief_name_of_neuro_fuzzy_system = std::string ("wANN"); + _description_of_neuro_fuzzy_system = std::string("ANNBFIS, neuro-fuzzy system with logical interpretation of fuzzy rules for weighted data items"); } ksi::neuro_fuzzy_system * ksi::weighted_annbfis::clone() const diff --git a/source/neuro-fuzzy/weighted_annbfis.h b/source/neuro-fuzzy/weighted_annbfis.h index ef60c8a..79ea357 100644 --- a/source/neuro-fuzzy/weighted_annbfis.h +++ b/source/neuro-fuzzy/weighted_annbfis.h @@ -44,6 +44,24 @@ namespace ksi @author Krzysztof Siminski */ weighted_annbfis (const implication & imp); + + + public: + /** constructor + * @param nRules number of rules + * @param nClusteringIterations number of clustering iterations + * @param nTuningIterations number of tuning iterations + * @param dbLearningCoefficient learning coefficient for gradient method + * @param tnorm a t-norm + * @param imp implication + * @param dbPositiveClass label of a positive class + * @param dbNegativeClass label of a negative class + * @param threshold_type classification threshold type + * @date 2024-03-03 + */ + weighted_annbfis (int nRules, int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, bool bNormalisation, const t_norm & tnorm, const implication & imp, double positive_class, double negative_class, const ksi::roc_threshold threshold_type); + + /** The method creates a fuzzy rulebase from the dataset. * @param nClusteringIterations number of clustering iterations * @param nTuningIterations number of tuning iterations @@ -55,7 +73,7 @@ namespace ksi virtual void createFuzzyRulebase ( int nClusteringIterations, int nTuningIterations, double dbLearningCoefficient, - const dataset & train); + const dataset & train, const dataset & validation); public: /** The method sets name and abbreviation of the system */ @@ -74,5 +92,4 @@ namespace ksi }; } - #endif diff --git a/source/service/debug.h b/source/service/debug.h index 303f210..135c253 100644 --- a/source/service/debug.h +++ b/source/service/debug.h @@ -4,12 +4,14 @@ #define DEBUG_H // #include -// #include +#include // #include // #include // #include // #include +#include + #include "../auxiliary/definitions.h" #include "../service/exception.h" @@ -19,6 +21,8 @@ #define drukuj(x) std::cerr << __FILE__ << " (" << __LINE__ << ") " << #x << " == " << (x) << std::endl; #define debug(x) drukuj(x) +#define thdebug(x) std::osyncstream(std::cerr) << __FILE__ << " (" << __LINE__ << ") " << #x << " == " << (x) << std::endl; + #define NAZWA(x) #x