From 95ef18b46cb255e2df73c080d4616ecebfe586a6 Mon Sep 17 00:00:00 2001 From: vnuczek Date: Tue, 25 Jun 2024 14:47:26 +0200 Subject: [PATCH] =?UTF-8?q?implementacja=20komponentu=20do=20obs=C5=82ugi?= =?UTF-8?q?=20danych=20dla=20walidacji=20krzy=C5=BCowej?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 6 + source/common/datum.cpp | 30 +- source/common/datum.h | 19 +- source/makefile | 25 +- source/readers/cross_validation_model.cpp | 42 ++ source/readers/cross_validation_model.h | 141 ++++++ source/readers/train_test_model.cpp | 340 +++++++++++++ source/readers/train_test_model.h | 445 ++++++++++++++++ .../readers/train_validation_test_model.cpp | 392 +++++++++++++++ source/readers/train_validation_test_model.h | 474 ++++++++++++++++++ 10 files changed, 1909 insertions(+), 5 deletions(-) create mode 100644 source/readers/cross_validation_model.cpp create mode 100644 source/readers/cross_validation_model.h create mode 100644 source/readers/train_test_model.cpp create mode 100644 source/readers/train_test_model.h create mode 100644 source/readers/train_validation_test_model.cpp create mode 100644 source/readers/train_validation_test_model.h diff --git a/.gitignore b/.gitignore index cf6c3d2..745326e 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,9 @@ source/results/ *.exe *.out *.app +/neuro-fu.993AC922/x64 +/results/exp-005 +/doxygen/html +/.vs +# Temporary VS file +*.pdb diff --git a/source/common/datum.cpp b/source/common/datum.cpp index a6df3c0..06c2ab5 100644 --- a/source/common/datum.cpp +++ b/source/common/datum.cpp @@ -294,7 +294,7 @@ namespace ksi //ss << " id == " << d.getID() << ", id_incomplete == " << d.getIDincomplete(); if (not d._labels.empty()) { - ss << " labels: "; + ss << " labels: "; // " | " for (const auto & l : d._labels) ss << l << " "; } @@ -385,3 +385,31 @@ std::size_t ksi::datum::getNumberOfLabels() const { return _labels.size(); } + +void ksi::datum::save_print(std::ostream& os) const +{ + for (std::size_t i = 0; i < attributes.size(); ++i) + { + os << attributes[i]->getValue(); + if (i < attributes.size() - 1) + { + os << " "; + } + } + + if (!_labels.empty()) + { + os << " | "; + for (std::size_t i = 0; i < _labels.size(); ++i) + { + os << _labels[i]; + if (i < _labels.size() - 1) + { + os << " "; + } + } + } + + os << std::endl; +} + diff --git a/source/common/datum.h b/source/common/datum.h index 768ccbf..fd252df 100644 --- a/source/common/datum.h +++ b/source/common/datum.h @@ -202,10 +202,25 @@ namespace ksi @author Krzysztof Siminski */ std::size_t getNumberOfLabels () const; - /** Output stream operator - @date 2018-05-13 + /** Output stream operator for datum. + * + * @param ss The output stream. + * @param d The datum to write to the stream. + * @return The output stream with the datum written to it. + * @date 2018-05-13 */ friend std::ostream & operator << (std::ostream & ss, const datum & d); + + /** + * Prints the datum to the provided output stream in the required format. + * Numerical attributes are separated from symbolical labels with a bar '|'. + * Example format: 1.2 5.6 7.8 | A B + * + * @param os The output stream to print the datum. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + void save_print(std::ostream& os) const; }; } diff --git a/source/makefile b/source/makefile index 7b29b0a..0313b3d 100644 --- a/source/makefile +++ b/source/makefile @@ -46,6 +46,9 @@ libasan= __ : $(release_folder) $(release_folder)/main ./$(release_folder)/main +9 : $(release_folder) $(release_folder)/main + ./$(release_folder)/main 9 + 1 : $(release_folder) $(release_folder)/main ./$(release_folder)/main 1 @@ -186,6 +189,14 @@ $(release_folder)/readers-reader-complete.o : readers/reader-complete.cpp $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ $(debug_folder)/readers-reader-complete.o : readers/reader-complete.cpp $(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^ +$(release_folder)/readers-train_test_model.o : readers/train_test_model.cpp + $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ +$(debug_folder)/readers-train_test_model.o : readers/train_test_model.cpp + $(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^ +$(release_folder)/readers-cross_validation_model.o : readers/cross_validation_model.cpp + $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ +$(debug_folder)/readers-cross_validation_model.o : readers/cross_validation_model.cpp + $(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^ $(release_folder)/readers-reader.o : readers/reader.cpp $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ $(debug_folder)/readers-reader.o : readers/reader.cpp @@ -198,6 +209,10 @@ $(release_folder)/readers-reader-incomplete.o : readers/reader-incomplete.cpp $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ $(debug_folder)/readers-reader-incomplete.o : readers/reader-incomplete.cpp $(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^ +$(release_folder)/readers-train_validation_test_model.o : readers/train_validation_test_model.cpp + $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ +$(debug_folder)/readers-train_validation_test_model.o : readers/train_validation_test_model.cpp + $(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^ $(release_folder)/implications-imp-kleene-dienes.o : implications/imp-kleene-dienes.cpp $(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^ $(debug_folder)/implications-imp-kleene-dienes.o : implications/imp-kleene-dienes.cpp @@ -955,6 +970,7 @@ $(release_folder)/metrics-metric-chebyshev.o \ $(release_folder)/common-data-modifier-imputer-knn-average.o \ $(release_folder)/tnorms-t-norm-frank.o \ $(release_folder)/partitions-cluster.o \ +$(release_folder)/readers-cross_validation_model.o \ $(release_folder)/partitions-fcm-possibilistic.o \ $(release_folder)/common-data-modifier-filter.o \ $(release_folder)/auxiliary-tempus.o \ @@ -1023,8 +1039,8 @@ $(release_folder)/partitions-fcm.o \ $(release_folder)/neuro-fuzzy-ma.o \ $(release_folder)/descriptors-descriptor-interval-gaussian-subspace.o \ $(release_folder)/descriptors-descriptor-triangular.o \ -$(release_folder)/readers-weighted_reader_complete.o \ $(release_folder)/implications-imp-fodor.o \ +$(release_folder)/readers-weighted_reader_complete.o \ $(release_folder)/snorms-s-norm-lukasiewicz.o \ $(release_folder)/tnorms-t-norm-lukasiewicz.o \ $(release_folder)/common-data-modifier-imputer-knn-median.o \ @@ -1079,6 +1095,7 @@ $(release_folder)/tnorms-t-norm-product.o \ $(release_folder)/partitions-fcm-T.o \ $(release_folder)/neuro-fuzzy-granular_subspace_annbfis_classification.o \ $(release_folder)/neuro-fuzzy-granular_nfs.o \ +$(release_folder)/readers-train_test_model.o \ $(release_folder)/auxiliary-mathematics.o \ $(release_folder)/tnorms-t-norm-fodor.o \ $(release_folder)/neuro-fuzzy-consequence-MA.o \ @@ -1087,6 +1104,7 @@ $(release_folder)/descriptors-descriptor-gaussian-subspace.o \ $(release_folder)/descriptors-descriptor-singleton.o \ $(release_folder)/common-datum.o \ $(release_folder)/neuro-fuzzy-fac_prototype_minkowski_regression.o \ +$(release_folder)/readers-train_validation_test_model.o \ $(release_folder)/metrics-metric-minkowski.o \ $(release_folder)/tnorms-t-norm-sugeno-weber.o \ $(release_folder)/common-data-modifier-normaliser.o \ @@ -1166,6 +1184,7 @@ $(debug_folder)/metrics-metric-chebyshev.o \ $(debug_folder)/common-data-modifier-imputer-knn-average.o \ $(debug_folder)/tnorms-t-norm-frank.o \ $(debug_folder)/partitions-cluster.o \ +$(debug_folder)/readers-cross_validation_model.o \ $(debug_folder)/partitions-fcm-possibilistic.o \ $(debug_folder)/common-data-modifier-filter.o \ $(debug_folder)/auxiliary-tempus.o \ @@ -1234,8 +1253,8 @@ $(debug_folder)/partitions-fcm.o \ $(debug_folder)/neuro-fuzzy-ma.o \ $(debug_folder)/descriptors-descriptor-interval-gaussian-subspace.o \ $(debug_folder)/descriptors-descriptor-triangular.o \ -$(debug_folder)/readers-weighted_reader_complete.o \ $(debug_folder)/implications-imp-fodor.o \ +$(debug_folder)/readers-weighted_reader_complete.o \ $(debug_folder)/snorms-s-norm-lukasiewicz.o \ $(debug_folder)/tnorms-t-norm-lukasiewicz.o \ $(debug_folder)/common-data-modifier-imputer-knn-median.o \ @@ -1290,6 +1309,7 @@ $(debug_folder)/tnorms-t-norm-product.o \ $(debug_folder)/partitions-fcm-T.o \ $(debug_folder)/neuro-fuzzy-granular_subspace_annbfis_classification.o \ $(debug_folder)/neuro-fuzzy-granular_nfs.o \ +$(debug_folder)/readers-train_test_model.o \ $(debug_folder)/auxiliary-mathematics.o \ $(debug_folder)/tnorms-t-norm-fodor.o \ $(debug_folder)/neuro-fuzzy-consequence-MA.o \ @@ -1298,6 +1318,7 @@ $(debug_folder)/descriptors-descriptor-gaussian-subspace.o \ $(debug_folder)/descriptors-descriptor-singleton.o \ $(debug_folder)/common-datum.o \ $(debug_folder)/neuro-fuzzy-fac_prototype_minkowski_regression.o \ +$(debug_folder)/readers-train_validation_test_model.o \ $(debug_folder)/metrics-metric-minkowski.o \ $(debug_folder)/tnorms-t-norm-sugeno-weber.o \ $(debug_folder)/common-data-modifier-normaliser.o \ diff --git a/source/readers/cross_validation_model.cpp b/source/readers/cross_validation_model.cpp new file mode 100644 index 0000000..c85b4e2 --- /dev/null +++ b/source/readers/cross_validation_model.cpp @@ -0,0 +1,42 @@ +/** @file */ + +#include "cross_validation_model.h" + +ksi::cross_validation_model::cross_validation_model(ksi::reader& source_reader) + : pReader(source_reader.clone()) {} + +ksi::cross_validation_model::cross_validation_model(const cross_validation_model& other) + : pReader(other.pReader) +{ + std::lock_guard lock(other.datasets_mutex); + datasets = other.datasets; +} + +ksi::cross_validation_model::cross_validation_model(cross_validation_model&& other) noexcept + : pReader(std::move(other.pReader)) +{ + std::lock_guard lock(other.datasets_mutex); + datasets = std::move(other.datasets); +} + +ksi::cross_validation_model& ksi::cross_validation_model::operator=(const cross_validation_model& other) +{ + if (this != &other) + { + pReader = other.pReader; + std::lock_guard lock(other.datasets_mutex); + datasets = other.datasets; + } + return *this; +} + +ksi::cross_validation_model& ksi::cross_validation_model::operator=(cross_validation_model&& other) noexcept +{ + if (this != &other) + { + pReader = std::move(other.pReader); + std::lock_guard lock(other.datasets_mutex); + datasets = std::move(other.datasets); + } + return *this; +} diff --git a/source/readers/cross_validation_model.h b/source/readers/cross_validation_model.h new file mode 100644 index 0000000..b07debd --- /dev/null +++ b/source/readers/cross_validation_model.h @@ -0,0 +1,141 @@ +/** @file */ + +#ifndef CROSS_VALIDATION_MODEL_H +#define CROSS_VALIDATION_MODEL_H + +#include "reader.h" + +#include +#include +#include +#include + +namespace ksi +{ + /** + * @class cross_validation_model + * An abstract class representing the base class for the cross-validation models. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class cross_validation_model : public reader + { + protected: + /** Pointer to a reader object which is used to read datasets */ + std::shared_ptr pReader = nullptr; + + /** Vector of datasets holds the subsets of the dataset */ + std::vector datasets; + + /** Mutex for synchronizing access to datasets */ + mutable std::mutex datasets_mutex; + + public: + /** + * Constructor for cross_validation_model. + * + * @param source_reader The reader object to initialize the pReader with. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + cross_validation_model(reader& source_reader); + + /** + * Copy constructor. + * + * @param other The cross_validation object to copy. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + cross_validation_model(const cross_validation_model& other); + + /** + * Move constructor. + * + * @param other The cross_validation object to move. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + cross_validation_model(cross_validation_model&& other) noexcept; + + /** + * Copy assignment operator. + * + * @param other The cross_validation object to copy. + * @return Reference to the copied cross_validation object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + cross_validation_model& operator=(const cross_validation_model& other); + + /** + * Move assignment operator. + * + * @param other The cross_validation object to move. + * @return Reference to the moved cross_validation object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + cross_validation_model& operator=(cross_validation_model&& other) noexcept; + + /** + * Virtual destructor for cross_validation_model. + */ + virtual ~cross_validation_model() = default; + + /** + * Splits the data into a specified number of subsets. + * + * @param base_dataset The dataset to be split. + * @param n The number of subsets to split the data into. Default is 10. + * @date 2024-05-30 + * @author Konrad Wnuk + */ + virtual void split(const dataset & base_dataset, const unsigned int n = 10) = 0; + + /** + * Saves the data to a specified directory. + * + * @param directory The directory where the data will be saved. + * @param filename The base name for the files to be saved. Default is "dataset". + * @param extension The file extension for the files to be saved. Default is ".data". + * @param overwrite Flag to control whether to overwrite the existing files. Default is false. + * @date 2024-06-30 + * @author Konrad Wnuk + */ + virtual void save(const std::filesystem::path& directory, const std::filesystem::path& filename = "dataset", const std::filesystem::path& extension = ".data", const bool overwrite = false) const = 0; + + /** + * Reads data from a specified file. + * + * @param file_directory The path of the file to read the data from. + * @return A dataset containing the data read from the file. + * @date 2024-05-30 + * @author Konrad Wnuk + */ + virtual dataset read_file(const std::filesystem::path& file_directory) = 0; + + /** + * Reads data from a specified directory. + * + * @param directory The directory to read the data from. + * @param file_regex_pattern The pattern to match the files. Defaults to ".*\\.data$". + * @date 2024-05-30 + * @author Konrad Wnuk + */ + virtual void read_directory(const std::filesystem::path& directory, const std::string& file_regex_pattern = ".*\\.data$") = 0; + + /** + * Reads data from a file and splits it into subsets. + * + * @param file_path The path of the file to read the data from. + * @param n The number of subsets to split the data into. + * @date 2024-06-14 + * @author Konrad Wnuk + */ + virtual void read_and_split_file(const std::filesystem::path& file_path, const unsigned int n = 10) = 0; + }; +} + +#endif diff --git a/source/readers/train_test_model.cpp b/source/readers/train_test_model.cpp new file mode 100644 index 0000000..ada9fe3 --- /dev/null +++ b/source/readers/train_test_model.cpp @@ -0,0 +1,340 @@ +/** @file */ + +#include "../readers/train_test_model.h" +#include "../service/exception.h" +#include "../auxiliary/to_string.h" + +#include +#include +#include +#include +#include +#include + +ksi::train_test_model::train_test_model(ksi::reader& source_reader) + : cross_validation_model(source_reader) {} + +ksi::train_test_model::train_test_model(const train_test_model& other) + : cross_validation_model(other) {} + +ksi::train_test_model::train_test_model(train_test_model&& other) noexcept + : cross_validation_model(std::move(other)) {} + +ksi::train_test_model& ksi::train_test_model::operator=(const train_test_model& other) +{ + if (this != &other) + { + cross_validation_model::operator=(other); + } + return *this; +} + +ksi::train_test_model& ksi::train_test_model::operator=(train_test_model&& other) noexcept +{ + if (this != &other) + { + cross_validation_model::operator=(std::move(other)); + } + return *this; +} + +void ksi::train_test_model::split(const ksi::dataset & base_dataset, const unsigned int n) +{ + try + { + if (n < 2) + { + throw ksi::exception("Number of subsets must be grater than (1)."); + } + + const auto total_size = base_dataset.size(); + if (n > total_size) + { + throw ksi::exception("Number of subsets (" + std::to_string(n) + ") cannot be greater than the number of data points (" + std::to_string(total_size) + ")."); + } + + datasets.clear(); + datasets.resize(n); + + const auto base_size = total_size / n; + const auto remainder = total_size % n; + + std::size_t index = 0; + for (int i = 0; i < n; ++i) + { + const auto current_size = base_size + (i < remainder ? 1 : 0); + + for (auto j = 0; j < current_size; ++j) + { + datasets[i].addDatum(*base_dataset.getDatum(index)); + ++index; + } + } + } + CATCH; +} + +void ksi::train_test_model::save(const std::filesystem::path& directory, const std::filesystem::path& filename, const std::filesystem::path& extension, const bool overwrite) const +{ + try + { + std::filesystem::create_directories(directory); + const auto num_files = datasets.size(); + const auto num_digits = std::to_string(num_files).length(); + + for (auto i = 0; i < datasets.size(); ++i) + { + auto file_path = directory / (filename.string() + "_" + ksi::to_string(i, num_digits) + extension.string()); + if (std::filesystem::exists(file_path) && !overwrite) + { + throw ksi::exception("File " + file_path.string() + " already exists. To overwrite, set the overwrite parameter to true."); + } + + std::ofstream file(file_path); + + if (file.is_open()) + { + for (auto j = 0; j < datasets[i].size(); ++j) + { + const datum* d = datasets[i].getDatum(j); + if (d) + { + d->save_print(file); + } + } + file.close(); + } + else + { + throw ksi::exception("Unable to open file " + file_path.string()); + } + } + } + CATCH; +} + +ksi::dataset ksi::train_test_model::read_file(const std::filesystem::path& file_directory) +{ + return pReader->read(file_directory.string()); +} + +void ksi::train_test_model::read_directory(const std::filesystem::path& directory, const std::string& file_regex_pattern) +{ + std::regex data_file_regex(file_regex_pattern); + auto data_files = std::filesystem::directory_iterator(directory) + | std::views::filter([](const auto& entry) { return entry.is_regular_file(); }) + | std::views::filter([&data_file_regex](const auto& entry) + { + return std::regex_match(entry.path().string(), data_file_regex); + }); + + std::vector filtered_files; + for (const auto& file : data_files) { + filtered_files.push_back(file.path()); + } + + std::vector threads; + for (const auto& file_path : filtered_files) { + threads.emplace_back([this, file_path]() + { + ksi::dataset ds = read_file(file_path); + + std::lock_guard lock(this->datasets_mutex); + this->datasets.push_back(std::move(ds)); + }); + } + + for (auto& thread : threads) { + if (thread.joinable()) { + thread.join(); + } + } +} + +std::shared_ptr ksi::train_test_model::clone() const +{ + return std::shared_ptr(new ksi::train_test_model(*this)); +} + +ksi::dataset ksi::train_test_model::read(const std::string& filename) +{ + return pReader->read(filename); +} + +void ksi::train_test_model::read_and_split_file(const std::filesystem::path& file_path, const unsigned int n) +{ + dataset base_dataset = read_file(file_path); + split(base_dataset, n); +} + +auto ksi::train_test_model::begin() -> ksi::train_test_model::iterator +{ + return { this, datasets.begin() }; +} + +auto ksi::train_test_model::end() -> ksi::train_test_model::iterator +{ + return { this, datasets.end() }; +} + +auto ksi::train_test_model::cbegin() const -> ksi::train_test_model::const_iterator +{ + return { this, datasets.cbegin() }; +} + +auto ksi::train_test_model::cend() const -> ksi::train_test_model::const_iterator +{ + return { this, datasets.cend() }; +} + +ksi::train_test_model::iterator::iterator(train_test_model* tt, std::vector::iterator test_it) + : pTT(tt), test_iterator(test_it) +{ + initialize_test_dataset(); +} + +ksi::train_test_model::iterator::iterator(const iterator& other) + : pTT(other.pTT), test_iterator(other.test_iterator), train_dataset(other.train_dataset) {} + +ksi::train_test_model::iterator::iterator(iterator&& other) noexcept + : pTT(other.pTT), test_iterator(std::move(other.test_iterator)), train_dataset(std::move(other.train_dataset)) {} + +ksi::train_test_model::iterator& ksi::train_test_model::iterator::operator=(const iterator& other) +{ + if (this != &other) + { + pTT = other.pTT; + test_iterator = other.test_iterator; + train_dataset = other.train_dataset; + } + return *this; +} + +ksi::train_test_model::iterator& ksi::train_test_model::iterator::operator=(iterator&& other) noexcept +{ + if (this != &other) + { + pTT = std::move(pTT); + test_iterator = std::move(other.test_iterator); + train_dataset = std::move(other.train_dataset); + } + return *this; +} + +ksi::train_test_model::iterator& ksi::train_test_model::iterator::operator++() +{ + ++test_iterator; + initialize_test_dataset(); + return *this; +} + +ksi::train_test_model::iterator ksi::train_test_model::iterator::operator++(int) +{ + iterator temp = *this; + ++(*this); + return temp; +} + +bool ksi::train_test_model::iterator::operator==(const iterator& other) const +{ + return test_iterator == other.test_iterator; +} + +std::strong_ordering ksi::train_test_model::iterator::operator<=>(const iterator& other) const +{ + return test_iterator <=> other.test_iterator; +} + +std::tuple ksi::train_test_model::iterator::operator*() const +{ + return std::make_tuple(train_dataset, *test_iterator); +} + +void ksi::train_test_model::iterator::initialize_test_dataset() +{ + train_dataset = ksi::dataset(); + + for (auto it = pTT->datasets.cbegin(); it != pTT->datasets.cend(); it++) { + if (it != test_iterator) { + for (std::size_t j = 0; j < it->size(); ++j) { + train_dataset.addDatum(*it->getDatum(j)); + } + } + } +} + +ksi::train_test_model::const_iterator::const_iterator(const train_test_model* tt, std::vector::const_iterator test_it) + : pTT(tt), test_iterator(test_it) +{ + initialize_test_dataset(); +} + +ksi::train_test_model::const_iterator::const_iterator(const const_iterator& other) + : pTT(other.pTT), test_iterator(other.test_iterator), train_dataset(other.train_dataset) {} + +ksi::train_test_model::const_iterator::const_iterator(const_iterator&& other) noexcept + : pTT(other.pTT), test_iterator(std::move(other.test_iterator)), train_dataset(std::move(other.train_dataset)) {} + +ksi::train_test_model::const_iterator& ksi::train_test_model::const_iterator::operator=(const const_iterator& other) +{ + if (this != &other) + { + pTT = other.pTT; + test_iterator = other.test_iterator; + train_dataset = other.train_dataset; + } + return *this; +} + +ksi::train_test_model::const_iterator& ksi::train_test_model::const_iterator::operator=(const_iterator&& other) noexcept +{ + if (this != &other) + { + pTT = std::move(pTT); + test_iterator = std::move(other.test_iterator); + train_dataset = std::move(other.train_dataset); + } + return *this; +} + +ksi::train_test_model::const_iterator& ksi::train_test_model::const_iterator::operator++() +{ + ++test_iterator; + initialize_test_dataset(); + return *this; +} + +ksi::train_test_model::const_iterator ksi::train_test_model::const_iterator::operator++(int) +{ + const_iterator temp = *this; + ++(*this); + return temp; +} + +bool ksi::train_test_model::const_iterator::operator==(const ksi::train_test_model::const_iterator& other) const +{ + return test_iterator == other.test_iterator; +} + +std::strong_ordering ksi::train_test_model::const_iterator::operator<=>(const ksi::train_test_model::const_iterator& other) const +{ + return test_iterator <=> other.test_iterator; +} + +std::tuple ksi::train_test_model::const_iterator::operator*() const +{ + return std::make_tuple(std::ref(train_dataset), std::ref(*test_iterator)); +} + +void ksi::train_test_model::const_iterator::initialize_test_dataset() +{ + train_dataset = ksi::dataset(); + + for (auto it = pTT->datasets.cbegin(); it != pTT->datasets.cend(); it++) { + if (it != test_iterator) { + for (std::size_t j = 0; j < it->size(); ++j) { + train_dataset.addDatum(*it->getDatum(j)); + } + } + } +} diff --git a/source/readers/train_test_model.h b/source/readers/train_test_model.h new file mode 100644 index 0000000..27cde50 --- /dev/null +++ b/source/readers/train_test_model.h @@ -0,0 +1,445 @@ +/** @file */ + +#ifndef TRAIN_TEST_MODEL_H +#define TRAIN_TEST_MODEL_H + +#include "cross_validation_model.h" + +#include + +namespace ksi +{ + /** + * @class train_test_model + * Class representing the Training and Test model, inheriting from cross_validation_model. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class train_test_model : public cross_validation_model + { + public: + /** + * @class iterator + * A nested class for iterating over train_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class iterator; + + /** + * @class const_iterator + * A nested class for read-only iteration over train_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class const_iterator; + + /** + * Constructor for train_test_model. + * + * @param source_reader The reader object to initialize the base class with. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + train_test_model(reader& source_reader); + + /** + * Copy constructor for train_test_model. + * + * @param other The train_validation_test_model object to copy. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_test_model(const train_test_model& other); + + /** + * Move constructor for train_test_model. + * + * @param other The train_validation_test_model object to move. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_test_model(train_test_model&& other) noexcept; + + /** + * Copy assignment operator for train_test_model. + * + * @param other The train_validation_test_model object to copy. + * @return A reference to the copied train_validation_test_model object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_test_model& operator=(const train_test_model& other); + + /** + * Move assignment operator for train_test_model. + * + * @param other The train_test_model object to move. + * @return A reference to the moved train_test_model object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_test_model& operator=(train_test_model&& other) noexcept; + + /** + * Splits the data into a specified number of subsets. + * + * @param base_dataset The dataset to be split. + * @param n The number of subsets to split the data into. Default is 10. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + void split(const dataset& base_dataset, const unsigned int n = 10) override; + + /** + * Saves the data to a specified directory. + * + * @param directory The directory where the data will be saved. + * @param filename The base name for the files to be saved. Default is "dataset". + * @param extension The file extension for the files to be saved. Default is ".data". + * @param overwrite Flag to control whether to overwrite the existing files. Default is false. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + void save(const std::filesystem::path& directory, const std::filesystem::path& filename = "dataset", const std::filesystem::path& extension = ".data", const bool overwrite = false) const override; + + /** + * Reads data from a specified file. + * + * @param file_directory The path of the file to read the data from. + * @return A dataset containing the data read from the file. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + dataset read_file(const std::filesystem::path& file_directory) override; + + /** + * Reads data from a specified directory. + * + * @param directory The directory to read the data from. + * @param file_regex_pattern The pattern to match the files. Defaults to ".*\\.data$". + * @date 2024-06-04 + * @author Konrad Wnuk + */ + void read_directory(const std::filesystem::path& directory, const std::string& file_regex_pattern = ".*\\.data$") override; + + /** + * Reads data from a specified file. + * + * @param filename The path of the file to read the data from. + * @return A dataset containing the data read from the file. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + dataset read(const std::string& filename) override; + + /** + * Reads data from a file and splits it into subsets. + * + * @param file_path The path of the file to read the data from. + * @param n The number of subsets to split the data into. + * @date 2024-06-14 + * @author Konrad Wnuk + */ + void read_and_split_file(const std::filesystem::path& file_path, const unsigned int n = 10) override; + + /** + * Clones the current reader object. + * (the prototype design pattern) + * + * @return A shared pointer to the cloned reader object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + std::shared_ptr clone() const override; + + /** + * Returns an iterator to the beginning of the train_test_model elements. + * + * @return An iterator to the beginning of the train_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + iterator begin(); + + /** + * Returns an iterator to the end of the train_test_model elements. + * + * @return An iterator to the end of the train_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + iterator end(); + + /** + * Returns a const iterator to the beginning of the train_test_model elements. + * + * @return A const_iterator to the beginning of the train_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + const_iterator cbegin() const; + + /** + * Returns a const iterator to the end of the train_test_model elements. + * + * @return A const_iterator to the end of the train_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + const_iterator cend() const; + }; + + /** + * @class train_test_model::iterator + * A class for iterating over train_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class train_test_model::iterator { + private: + /** Pointer to the train_test_model object */ + train_test_model* pTT; + + /** Iterator pointing to the current test dataset */ + std::vector::iterator test_iterator; + + /** Combined train dataset of all the datasets without testing dataset */ + dataset train_dataset; + + public: + /** + * Constructs a new iterator. + * + * @param tt Pointer to the train_test_model object. + * @param test_it Iterator pointing to the current test dataset. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + iterator(train_test_model* tt, std::vector::iterator test_it); + + /** + * Copy constructor. + * + * @param other The iterator object to copy. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + iterator(const iterator& other); + + /** + * Move constructor. + * + * @param other The iterator object to move. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + iterator(iterator&& other) noexcept; + + /** + * Copy assignment operator. + * + * @param other The iterator object to copy. + * @return Reference to the copied iterator object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + iterator& operator=(const iterator& other); + + /** + * Move assignment operator. + * + * @param other The iterator object to move. + * @return Reference to the moved iterator object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + iterator& operator=(iterator&& other) noexcept; + + /** + * Advances the iterator to the next element (preincrement). + * + * @return A reference to the advanced iterator. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + iterator& operator++(); + + /** + * Advances the iterator to the next element (postincrement). + * + * @return A copy of the iterator before it was advanced. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + iterator operator++(int); + + /** + * Checks if two iterators are equal. + * + * @param other The other iterator to compare with. + * @return True if the iterators are equal, false otherwise. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + bool operator==(const iterator& other) const; + + /** + * Compares two iterators. + * + * @param other The other iterator to compare with. + * @return The result of the comparison. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + std::strong_ordering operator<=>(const iterator& other) const; + + /** + * Dereferences the iterator to access the current train_test_model element. + * + * @return A tuple containing the combined training dataset and the current test dataset. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + std::tuple operator*() const; + + private: + /** + * Initializes the train dataset by combining all datasets except the current test dataset. + * + * @date 2024-06-09 + * @autor Konrad Wnuk + */ + void initialize_test_dataset(); + }; + + /** + * @class train_test_model::const_iterator + * A class for read-only iteration over train_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class train_test_model::const_iterator { + private: + /** Pointer to the train_test_model object */ + const train_test_model* pTT; + + /** Const iterator pointing to the current test dataset */ + std::vector::const_iterator test_iterator; + + /** Combined train dataset of all the datasets without testing dataset */ + dataset train_dataset; + + public: + /** + * Constructs a new const_iterator. + * + * @param tt Pointer to the train_test_model object. + * @param test_it Const iterator pointing to the current test dataset. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + const_iterator(const train_test_model* tt, std::vector::const_iterator test_it); + + /** + * Copy constructor. + * + * @param other The const_iterator object to copy. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + const_iterator(const const_iterator& other); + + /** + * Move constructor. + * + * @param other The const_iterator object to move. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + const_iterator(const_iterator&& other) noexcept; + + /** + * Copy assignment operator. + * + * @param other The const_iterator object to copy. + * @return Reference to the copied const_iterator object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + const_iterator& operator=(const const_iterator& other); + + /** + * Move assignment operator. + * + * @param other The const_iterator object to move. + * @return Reference to the moved const_iterator object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + const_iterator& operator=(const_iterator&& other) noexcept; + + /** + * Advances the const iterator to the next element. + * + * @return A reference to the advanced const iterator. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + const_iterator& operator++(); + + /** + * Advances the const iterator to the next element (post-increment). + * + * @return A copy of the const iterator before it was advanced. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + const_iterator operator++(int); + + /** + * Checks if two const iterators are equal. + * + * @param other The other const iterator to compare with. + * @return True if the const iterators are equal, false otherwise. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + bool operator==(const const_iterator& other) const; + + /** + * Compares two const iterators. + * + * @param other The other const iterator to compare with. + * @return The result of the comparison. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + std::strong_ordering operator<=>(const const_iterator& other) const; + + /** + * Dereferences the const iterator to access the current train_test_model element. + * + * @return A tuple containing the combined training dataset and the current test dataset. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + std::tuple operator*() const; + + private: + /** + * Initializes the train dataset by combining all datasets except the current test dataset. + * + * @date 2024-06-09 + * @autor Konrad Wnuk + */ + void initialize_test_dataset(); + }; +} + +#endif diff --git a/source/readers/train_validation_test_model.cpp b/source/readers/train_validation_test_model.cpp new file mode 100644 index 0000000..aa7f6b6 --- /dev/null +++ b/source/readers/train_validation_test_model.cpp @@ -0,0 +1,392 @@ +/** @file */ + +#include "../readers/train_validation_test_model.h" +#include "../service/exception.h" +#include "../auxiliary/to_string.h" + +#include +#include +#include +#include + +ksi::train_validation_test_model::train_validation_test_model(ksi::reader& source_reader) + : cross_validation_model(source_reader) {} + +ksi::train_validation_test_model::train_validation_test_model(ksi::reader& reader, const int validation_dataset_size) + : cross_validation_model(reader), validation_size(validation_dataset_size) {} + +ksi::train_validation_test_model::train_validation_test_model(const train_validation_test_model& other) + : cross_validation_model(other) {} + +ksi::train_validation_test_model::train_validation_test_model(train_validation_test_model&& other) noexcept + : cross_validation_model(std::move(other)) {} + +ksi::train_validation_test_model& ksi::train_validation_test_model::operator=(const train_validation_test_model& other) +{ + if (this != &other) + { + cross_validation_model::operator=(other); + } + return *this; +} + +ksi::train_validation_test_model& ksi::train_validation_test_model::operator=(train_validation_test_model&& other) noexcept +{ + if (this != &other) + { + cross_validation_model::operator=(std::move(other)); + } + return *this; +} + +void ksi::train_validation_test_model::split(const ksi::dataset& base_dataset, const unsigned int n) +{ + try + { + if (n < 3) + { + throw ksi::exception("Number of subsets must be grater than (2)."); + } + + const auto total_size = base_dataset.size(); + if (n > total_size) + { + throw ksi::exception("Number of subsets (" + std::to_string(n) + ") cannot be greater than the number of data points (" + std::to_string(total_size) + ")."); + } + + datasets.clear(); + datasets.resize(n); + + const auto base_size = total_size / n; + const auto remainder = total_size % n; + + std::size_t index = 0; + for (int i = 0; i < n; ++i) + { + const auto current_size = base_size + (i < remainder ? 1 : 0); + + for (auto j = 0; j < current_size; ++j) + { + datasets[i].addDatum(*base_dataset.getDatum(index)); + ++index; + } + } + } + CATCH; +} + +void ksi::train_validation_test_model::save(const std::filesystem::path& directory, const std::filesystem::path& filename, const std::filesystem::path& extension, const bool overwrite) const +{ + try + { + std::filesystem::create_directories(directory); + + const auto num_files = datasets.size(); + const auto num_digits = std::to_string(num_files).length(); + + for (auto i = 0; i < datasets.size(); ++i) + { + auto file_path = directory / (filename.string() + "_" + ksi::to_string(i, num_digits) + extension.string()); + + if (std::filesystem::exists(file_path) && !overwrite) + { + throw ksi::exception("File " + file_path.string() + " already exists. To overwrite, set the overwrite parameter to true."); + } + + std::ofstream file(file_path); + + if (file.is_open()) + { + for (auto j = 0; j < datasets[i].size(); ++j) + { + const datum* d = datasets[i].getDatum(j); + if (d) + { + d->save_print(file); + } + } + file.close(); + } + else + { + throw ksi::exception("Unable to open file " + file_path.string()); + } + } + } + CATCH; +} + +ksi::dataset ksi::train_validation_test_model::read_file(const std::filesystem::path& file_directory) +{ + return pReader->read(file_directory.string()); +} + +void ksi::train_validation_test_model::read_directory(const std::filesystem::path& directory, const std::string& file_regex_pattern) +{ + std::regex data_file_regex(file_regex_pattern); + auto data_files = std::filesystem::directory_iterator(directory) + | std::views::filter([](const auto& entry) { return entry.is_regular_file(); }) + | std::views::filter([&data_file_regex](const auto& entry) + { + return std::regex_match(entry.path().string(), data_file_regex); + }); + + std::vector filtered_files; + for (const auto& file : data_files) { + filtered_files.push_back(file.path()); + } + + std::vector threads; + for (const auto& file_path : filtered_files) { + threads.emplace_back([this, file_path]() + { + ksi::dataset ds = read_file(file_path); + + std::lock_guard lock(this->datasets_mutex); + this->datasets.push_back(std::move(ds)); + }); + } + + for (auto& thread : threads) { + if (thread.joinable()) { + thread.join(); + } + } +} + +std::shared_ptr ksi::train_validation_test_model::clone() const +{ + return std::shared_ptr(new ksi::train_validation_test_model(*this)); +} + +ksi::dataset ksi::train_validation_test_model::read(const std::string& filename) +{ + return pReader->read(filename); +} + +void ksi::train_validation_test_model::read_and_split_file(const std::filesystem::path& file_path, const unsigned int n) +{ + dataset base_dataset = read_file(file_path); + split(base_dataset, n); +} + +auto ksi::train_validation_test_model::begin() -> ksi::train_validation_test_model::iterator +{ + return { this, datasets.begin() }; +} + +auto ksi::train_validation_test_model::end() -> ksi::train_validation_test_model::iterator +{ + return { this, datasets.end() }; +} + +auto ksi::train_validation_test_model::cbegin() const -> ksi::train_validation_test_model::const_iterator +{ + return { this, datasets.cbegin() }; +} + +auto ksi::train_validation_test_model::cend() const -> ksi::train_validation_test_model::const_iterator +{ + return { this, datasets.cend() }; +} + +ksi::train_validation_test_model::iterator::iterator(train_validation_test_model* tvt, std::vector::iterator test_it) + : pTVT(tvt), test_iterator(test_it) +{ + initialize_train_and_validation_datasets(); +} + +ksi::train_validation_test_model::iterator::iterator(const iterator& other) + : pTVT(other.pTVT), test_iterator(other.test_iterator), validation_dataset(other.validation_dataset), train_dataset(other.train_dataset) {} + +ksi::train_validation_test_model::iterator::iterator(iterator&& other) noexcept + : pTVT(other.pTVT), test_iterator(std::move(other.test_iterator)), validation_dataset(std::move(other.validation_dataset)), train_dataset(std::move(other.train_dataset)) {} + +ksi::train_validation_test_model::iterator& ksi::train_validation_test_model::iterator::operator=(const iterator& other) +{ + if (this != &other) + { + pTVT = other.pTVT; + test_iterator = other.test_iterator; + validation_dataset = other.validation_dataset; + train_dataset = other.train_dataset; + } + return *this; +} + +ksi::train_validation_test_model::iterator& ksi::train_validation_test_model::iterator::operator=(iterator&& other) noexcept +{ + if (this != &other) + { + pTVT = std::move(other.pTVT); + test_iterator = std::move(other.test_iterator); + validation_dataset = std::move(other.validation_dataset); + train_dataset = std::move(other.train_dataset); + } + return *this; +} + +ksi::train_validation_test_model::iterator& ksi::train_validation_test_model::iterator::operator++() +{ + ++test_iterator; + initialize_train_and_validation_datasets(); + return *this; +} + +ksi::train_validation_test_model::iterator ksi::train_validation_test_model::iterator::operator++(int) +{ + iterator temp = *this; + ++(*this); + return temp; +} + +bool ksi::train_validation_test_model::iterator::operator==(const iterator& other) const +{ + return test_iterator == other.test_iterator; +} + +std::strong_ordering ksi::train_validation_test_model::iterator::operator<=>(const iterator& other) const +{ + return test_iterator <=> other.test_iterator; +} + +std::tuple ksi::train_validation_test_model::iterator::operator*() const +{ + return std::make_tuple(train_dataset, validation_dataset, *test_iterator); +} + +void ksi::train_validation_test_model::iterator::initialize_train_and_validation_datasets() +{ + train_dataset = ksi::dataset(); + validation_dataset = ksi::dataset(); + + auto total_datasets = pTVT->datasets.size(); + auto validation_start_index = std::distance(pTVT->datasets.begin(), test_iterator) + 1; + auto current_validation_count = 0; + + auto current_iterator = pTVT->datasets.begin(); + std::advance(current_iterator, validation_start_index % total_datasets); + + for (auto it = pTVT->datasets.begin(); it != pTVT->datasets.end(); ++it) { + if (it == test_iterator) { + continue; + } + + if (current_validation_count < pTVT->validation_size && it == current_iterator) { + for (std::size_t j = 0; j < it->size(); ++j) { + validation_dataset.addDatum(*it->getDatum(j)); + } + ++current_validation_count; + ++current_iterator; + if (current_iterator == pTVT->datasets.end()) { + current_iterator = pTVT->datasets.begin(); + } + } + else { + for (std::size_t j = 0; j < it->size(); ++j) { + train_dataset.addDatum(*it->getDatum(j)); + } + } + } +} + +ksi::train_validation_test_model::const_iterator::const_iterator(const train_validation_test_model* tvt, std::vector::const_iterator test_it) + : pTVT(tvt), test_iterator(test_it) +{ + initialize_train_and_validation_datasets(); +} + +ksi::train_validation_test_model::const_iterator::const_iterator(const const_iterator& other) + : pTVT(other.pTVT), test_iterator(other.test_iterator), validation_dataset(other.validation_dataset), train_dataset(other.train_dataset) {} + +ksi::train_validation_test_model::const_iterator::const_iterator(const_iterator&& other) noexcept + : pTVT(other.pTVT), test_iterator(std::move(other.test_iterator)), validation_dataset(std::move(other.validation_dataset)), train_dataset(std::move(other.train_dataset)) {} + +ksi::train_validation_test_model::const_iterator& ksi::train_validation_test_model::const_iterator::operator=(const const_iterator& other) +{ + if (this != &other) + { + pTVT = other.pTVT; + test_iterator = other.test_iterator; + validation_dataset = other.validation_dataset; + train_dataset = other.train_dataset; + } + return *this; +} + +ksi::train_validation_test_model::const_iterator& ksi::train_validation_test_model::const_iterator::operator=(const_iterator&& other) noexcept +{ + if (this != &other) + { + pTVT = std::move(other.pTVT); + test_iterator = std::move(other.test_iterator); + validation_dataset = std::move(other.validation_dataset); + train_dataset = std::move(other.train_dataset); + } + return *this; +} + +ksi::train_validation_test_model::const_iterator& ksi::train_validation_test_model::const_iterator::operator++() +{ + ++test_iterator; + initialize_train_and_validation_datasets(); + return *this; +} + +ksi::train_validation_test_model::const_iterator ksi::train_validation_test_model::const_iterator::operator++(int) +{ + const_iterator temp = *this; + ++(*this); + return temp; +} + +bool ksi::train_validation_test_model::const_iterator::operator==(const const_iterator& other) const +{ + return test_iterator == other.test_iterator; +} + +std::strong_ordering ksi::train_validation_test_model::const_iterator::operator<=>(const const_iterator& other) const +{ + return test_iterator <=> other.test_iterator; +} + +std::tuple ksi::train_validation_test_model::const_iterator::operator*() const +{ + return std::make_tuple(std::ref(train_dataset), std::ref(validation_dataset), std::ref(*test_iterator)); +} + +void ksi::train_validation_test_model::const_iterator::initialize_train_and_validation_datasets() +{ + train_dataset = ksi::dataset(); + validation_dataset = ksi::dataset(); + + auto total_datasets = pTVT->datasets.size(); + auto validation_start_index = std::distance(pTVT->datasets.cbegin(), test_iterator) + 1; + auto current_validation_count = 0; + + auto current_iterator = pTVT->datasets.cbegin(); + std::advance(current_iterator, validation_start_index % total_datasets); + + for (auto it = pTVT->datasets.cbegin(); it != pTVT->datasets.cend(); ++it) { + if (it == test_iterator) { + continue; + } + + if (current_validation_count < pTVT->validation_size && it == current_iterator) { + for (std::size_t j = 0; j < it->size(); ++j) { + validation_dataset.addDatum(*it->getDatum(j)); + } + ++current_validation_count; + ++current_iterator; + if (current_iterator == pTVT->datasets.cend()) { + current_iterator = pTVT->datasets.cbegin(); + } + } + else { + for (std::size_t j = 0; j < it->size(); ++j) { + train_dataset.addDatum(*it->getDatum(j)); + } + } + } +} + diff --git a/source/readers/train_validation_test_model.h b/source/readers/train_validation_test_model.h new file mode 100644 index 0000000..9a6674d --- /dev/null +++ b/source/readers/train_validation_test_model.h @@ -0,0 +1,474 @@ +/** @file */ + +#ifndef TRAIN_VALIDATION_TEST_MODEL_H +#define TRAIN_VALIDATION_TEST_MODEL_H + +#include "cross_validation_model.h" + +#include + +namespace ksi +{ + + /** + * @class train_validation_test_model + * Class representing the Training, Validation and Test model, inheriting from cross_validation_model. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class train_validation_test_model : public cross_validation_model + { + private: + /** + * @brief The size of the validation dataset. + * + * Determines how many subsets are used for validation in each iteration. + * Default value is 1. + */ + int validation_size = 1; + + public: + /** + * @class iterator + * A nested class for iterating over train_validation_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class iterator; + + /** + * @class const_iterator + * A nested class for read-only iteration over train_validation_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class const_iterator; + + /** + * Constructor for train_validation_test_model. + * + * @param source_reader The reader object to initialize the base class with. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + train_validation_test_model(reader& source_reader); + + /** + * Constructor for train_validation_test_model. + * + * @param reader The reader object to initialize the base class with. + * @param validation_dataset_size Size of the validation dataset. + * @date 2024-06-09 + * @author Konrad Wnuk + */ + train_validation_test_model(reader& reader, const int validation_dataset_size); + + /** + * Copy constructor for train_validation_test_model. + * + * @param other The train_validation_test_model object to copy. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_validation_test_model(const train_validation_test_model& other); + + /** + * Move constructor for train_validation_test_model. + * + * @param other The train_validation_test_model object to move. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_validation_test_model(train_validation_test_model&& other) noexcept; + + /** + * Copy assignment operator for train_validation_test_model. + * + * @param other The train_validation_test_model object to copy. + * @return A reference to the copied train_validation_test_model object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_validation_test_model& operator=(const train_validation_test_model& other); + + /** + * Move assignment operator for train_validation_test_model. + * + * @param other The train_validation_test_model object to move. + * @return A reference to the moved train_validation_test_model object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + train_validation_test_model& operator=(train_validation_test_model&& other) noexcept; + + /** + * Splits the data into a specified number of subsets. + * + * @param base_dataset The dataset to be split. + * @param n The number of subsets to split the data into. Default is 10. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + void split(const dataset & base_dataset, const unsigned int n = 10) override; + + /** + * Saves the data to a specified directory. + * + * @param directory The directory where the data will be saved. + * @param filename The base name for the files to be saved. Default is "dataset". + * @param extension The file extension for the files to be saved. Default is ".data". + * @param overwrite Flag to control whether to overwrite the existing files. Default is false. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + void save(const std::filesystem::path& directory, const std::filesystem::path& filename = "dataset", const std::filesystem::path& extension = ".data", const bool overwrite = false) const override; + + /** + * Reads data from a specified file. + * + * @param file_directory The path of the file to read the data from. + * @return A dataset containing the data read from the file. + * @date 2024-06-04 + * @author Konrad Wnuk + */ + dataset read_file(const std::filesystem::path& file_directory) override; + + /** + * Reads data from a specified directory. + * + * @param directory The directory to read the data from. + * @param file_regex_pattern The pattern to match the files. Defaults to ".*\\.data$". + * @date 2024-06-04 + * @author Konrad Wnuk + */ + void read_directory(const std::filesystem::path& directory, const std::string& file_regex_pattern = ".*\\.data$") override; + + /** + * Reads data from a specified file. + * + * @param filename The path of the file to read the data from. + * @return A dataset containing the data read from the file. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + dataset read(const std::string& filename) override; + + /** + * Reads data from a file and splits it into subsets. + * + * @param file_path The path of the file to read the data from. + * @param n The number of subsets to split the data into. + * @date 2024-06-14 + * @author Konrad Wnuk + */ + void read_and_split_file(const std::filesystem::path& file_path, const unsigned int n = 10) override; + + /** + * Clones the current reader object. + * (the prototype design pattern) + * + * @return A shared pointer to the cloned reader object. + * @date 2024-06-08 + * @author Konrad Wnuk + */ + std::shared_ptr clone() const override; + + /** + * Returns an iterator to the beginning of the train_validation_test_model elements. + * + * @return An iterator to the beginning of the train_validation_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + iterator begin(); + + /** + * Returns an iterator to the end of the train_validation_test_model elements. + * + * @return An iterator to the end of the train_validation_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + iterator end(); + + /** + * Returns a const iterator to the beginning of the train_validation_test_model elements. + * + * @return A const_iterator to the beginning of the train_validation_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + const_iterator cbegin() const; + + /** + * Returns a const iterator to the end of the train_validation_test_model elements. + * + * @return A const_iterator to the end of the train_validation_test_model elements. + * @date 2024-06-05 + * @author Konrad Wnuk + */ + const_iterator cend() const; + }; + + /** + * @class train_validation_test_model::iterator + * A class for iterating over train_validation_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class train_validation_test_model::iterator { + private: + /** Pointer to the train_validation_test_model object */ + train_validation_test_model* pTVT; + + /** Const iterator pointing to the current test dataset */ + std::vector::iterator test_iterator; + + /** Combined validation dataset of with n datasets */ + dataset validation_dataset; + + /** Combined train dataset of all the datasets without validation datasets and testing dataset */ + dataset train_dataset; + + public: + + /** + * Constructs a new iterator. + * + * @param tvt Pointer to the train_validation_test_model object. + * @param test_it Iterator pointing to the current test dataset. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + iterator(train_validation_test_model* tvt, std::vector::iterator test_it); + + /** + * Copy constructor. + * + * @param other The iterator object to copy. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + iterator(const iterator& other); + + /** + * Move constructor. + * + * @param other The iterator object to move. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + iterator(iterator&& other) noexcept; + + /** + * Copy assignment operator. + * + * @param other The iterator object to copy. + * @return Reference to the copied iterator object. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + iterator& operator=(const iterator& other); + + /** + * Move assignment operator. + * + * @param other The iterator object to move. + * @return Reference to the moved iterator object. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + iterator& operator=(iterator&& other) noexcept; + + /** + * Advances the iterator to the next element. + * + * @return A reference to the advanced iterator. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + iterator& operator++(); + + /** + * Advances the iterator to the next element (post-increment). + * + * @return A copy of the iterator before it was advanced. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + iterator operator++(int); + + /** + * Checks if two iterators are equal. + * + * @param other The other iterator to compare with. + * @return True if the iterators are equal, false otherwise. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + bool operator==(const iterator& other) const; + + /** + * Compares two iterators. + * + * @param other The other iterator to compare with. + * @return The result of the comparison. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + std::strong_ordering operator<=>(const iterator& other) const; + + /** + * Dereferences the iterator to access the current train_validation_test_model element. + * + * @return A tuple containing the combined training dataset, the combined validation dataset and the current test dataset. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + std::tuple operator*() const; + + private: + /** + * Initializes the train and validation datasets by combining all datasets except the current test dataset. + * + * @date 2024-06-10 + * @author Konrad Wnuk + */ + void initialize_train_and_validation_datasets(); + }; + + /** + * @class train_validation_test_model::const_iterator + * A class for read-only iteration over train_validation_test_model elements. + * + * @date 2024-05-30 + * @author Konrad Wnuk + */ + class train_validation_test_model::const_iterator { + private: + /** Pointer to the train_validation_test_model object */ + const train_validation_test_model* pTVT; + + /** Const iterator pointing to the current test dataset */ + std::vector::const_iterator test_iterator; + + /** Combined validation dataset of with n datasets */ + dataset validation_dataset; + + /** Combined train dataset of all the datasets without validation datasets and testing dataset */ + dataset train_dataset; + + public: + + /** + * Constructs a new const_iterator. + * + * @param tvt Pointer to the train_validation_test_model object. + * @param test_it Const iterator pointing to the current test dataset. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + const_iterator(const train_validation_test_model* tvt, std::vector::const_iterator test_it); + + /** + * Copy constructor. + * + * @param other The const_iterator object to copy. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + const_iterator(const const_iterator& other); + + /** + * Move constructor. + * + * @param other The const_iterator object to move. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + const_iterator(const_iterator&& other) noexcept; + + /** + * Copy assignment operator. + * + * @param other The const_iterator object to copy. + * @return Reference to the copied const_iterator object. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + const_iterator& operator=(const const_iterator& other); + + /** + * Move assignment operator. + * + * @param other The const_iterator object to move. + * @return Reference to the moved const_iterator object. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + const_iterator& operator=(const_iterator&& other) noexcept; + + /** + * Advances the const iterator to the next element. + * + * @return A reference to the advanced const iterator. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + const_iterator& operator++(); + + /** + * Advances the const iterator to the next element (post-increment). + * + * @return A copy of the const iterator before it was advanced. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + const_iterator operator++(int); + + /** + * Checks if two const iterators are equal. + * + * @param other The other const iterator to compare with. + * @return True if the const iterators are equal, false otherwise. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + bool operator==(const const_iterator& other) const; + + /** + * Compares two const iterators. + * + * @param other The other const iterator to compare with. + * @return The result of the comparison. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + std::strong_ordering operator<=>(const const_iterator& other) const; + + /** + * Dereferences the const iterator to access the current train_validation_test_model element. + * + * @return A tuple containing the combined training dataset, the combined validation dataset and the current test dataset. + * @date 2024-06-10 + * @author Konrad Wnuk + */ + std::tuple operator*() const; + + private: + /** + * Initializes the train and validation datasets by combining all datasets except the current test dataset. + * + * @date 2024-06-10 + * @author Konrad Wnuk + */ + void initialize_train_and_validation_datasets(); + }; +} + +#endif +