Skip to content

Commit

Permalink
Merge pull request #12 from vnuczek/cross-validation
Browse files Browse the repository at this point in the history
implementacja komponentu do obsługi danych dla walidacji krzyżowej
  • Loading branch information
ksiminski authored Jun 25, 2024
2 parents b8d0bb8 + 95ef18b commit 9cf31f7
Show file tree
Hide file tree
Showing 10 changed files with 1,909 additions and 5 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,9 @@ source/results/
*.exe
*.out
*.app
/neuro-fu.993AC922/x64
/results/exp-005
/doxygen/html
/.vs
# Temporary VS file
*.pdb
30 changes: 29 additions & 1 deletion source/common/datum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ namespace ksi
//ss << " id == " << d.getID() << ", id_incomplete == " << d.getIDincomplete();
if (not d._labels.empty())
{
ss << " labels: ";
ss << " labels: "; // " | "
for (const auto & l : d._labels)
ss << l << " ";
}
Expand Down Expand Up @@ -385,3 +385,31 @@ std::size_t ksi::datum::getNumberOfLabels() const
{
return _labels.size();
}

void ksi::datum::save_print(std::ostream& os) const
{
for (std::size_t i = 0; i < attributes.size(); ++i)
{
os << attributes[i]->getValue();
if (i < attributes.size() - 1)
{
os << " ";
}
}

if (!_labels.empty())
{
os << " | ";
for (std::size_t i = 0; i < _labels.size(); ++i)
{
os << _labels[i];
if (i < _labels.size() - 1)
{
os << " ";
}
}
}

os << std::endl;
}

19 changes: 17 additions & 2 deletions source/common/datum.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,25 @@ namespace ksi
@author Krzysztof Siminski */
std::size_t getNumberOfLabels () const;

/** Output stream operator
@date 2018-05-13
/** Output stream operator for datum.
*
* @param ss The output stream.
* @param d The datum to write to the stream.
* @return The output stream with the datum written to it.
* @date 2018-05-13
*/
friend std::ostream & operator << (std::ostream & ss, const datum & d);

/**
* Prints the datum to the provided output stream in the required format.
* Numerical attributes are separated from symbolical labels with a bar '|'.
* Example format: 1.2 5.6 7.8 | A B
*
* @param os The output stream to print the datum.
* @date 2024-06-10
* @author Konrad Wnuk
*/
void save_print(std::ostream& os) const;
};
}

Expand Down
25 changes: 23 additions & 2 deletions source/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ libasan=
__ : $(release_folder) $(release_folder)/main
./$(release_folder)/main

9 : $(release_folder) $(release_folder)/main
./$(release_folder)/main 9

1 : $(release_folder) $(release_folder)/main
./$(release_folder)/main 1

Expand Down Expand Up @@ -186,6 +189,14 @@ $(release_folder)/readers-reader-complete.o : readers/reader-complete.cpp
$(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^
$(debug_folder)/readers-reader-complete.o : readers/reader-complete.cpp
$(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^
$(release_folder)/readers-train_test_model.o : readers/train_test_model.cpp
$(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^
$(debug_folder)/readers-train_test_model.o : readers/train_test_model.cpp
$(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^
$(release_folder)/readers-cross_validation_model.o : readers/cross_validation_model.cpp
$(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^
$(debug_folder)/readers-cross_validation_model.o : readers/cross_validation_model.cpp
$(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^
$(release_folder)/readers-reader.o : readers/reader.cpp
$(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^
$(debug_folder)/readers-reader.o : readers/reader.cpp
Expand All @@ -198,6 +209,10 @@ $(release_folder)/readers-reader-incomplete.o : readers/reader-incomplete.cpp
$(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^
$(debug_folder)/readers-reader-incomplete.o : readers/reader-incomplete.cpp
$(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^
$(release_folder)/readers-train_validation_test_model.o : readers/train_validation_test_model.cpp
$(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^
$(debug_folder)/readers-train_validation_test_model.o : readers/train_validation_test_model.cpp
$(compiler) $(standard) $(debug) $(optyDebug) $(parallel) $(errors) $(sanitizer) -c -o $@ $^
$(release_folder)/implications-imp-kleene-dienes.o : implications/imp-kleene-dienes.cpp
$(compiler) $(standard) $(release) $(optRelease) $(parallel) $(errors) -c -o $@ $^
$(debug_folder)/implications-imp-kleene-dienes.o : implications/imp-kleene-dienes.cpp
Expand Down Expand Up @@ -955,6 +970,7 @@ $(release_folder)/metrics-metric-chebyshev.o \
$(release_folder)/common-data-modifier-imputer-knn-average.o \
$(release_folder)/tnorms-t-norm-frank.o \
$(release_folder)/partitions-cluster.o \
$(release_folder)/readers-cross_validation_model.o \
$(release_folder)/partitions-fcm-possibilistic.o \
$(release_folder)/common-data-modifier-filter.o \
$(release_folder)/auxiliary-tempus.o \
Expand Down Expand Up @@ -1023,8 +1039,8 @@ $(release_folder)/partitions-fcm.o \
$(release_folder)/neuro-fuzzy-ma.o \
$(release_folder)/descriptors-descriptor-interval-gaussian-subspace.o \
$(release_folder)/descriptors-descriptor-triangular.o \
$(release_folder)/readers-weighted_reader_complete.o \
$(release_folder)/implications-imp-fodor.o \
$(release_folder)/readers-weighted_reader_complete.o \
$(release_folder)/snorms-s-norm-lukasiewicz.o \
$(release_folder)/tnorms-t-norm-lukasiewicz.o \
$(release_folder)/common-data-modifier-imputer-knn-median.o \
Expand Down Expand Up @@ -1079,6 +1095,7 @@ $(release_folder)/tnorms-t-norm-product.o \
$(release_folder)/partitions-fcm-T.o \
$(release_folder)/neuro-fuzzy-granular_subspace_annbfis_classification.o \
$(release_folder)/neuro-fuzzy-granular_nfs.o \
$(release_folder)/readers-train_test_model.o \
$(release_folder)/auxiliary-mathematics.o \
$(release_folder)/tnorms-t-norm-fodor.o \
$(release_folder)/neuro-fuzzy-consequence-MA.o \
Expand All @@ -1087,6 +1104,7 @@ $(release_folder)/descriptors-descriptor-gaussian-subspace.o \
$(release_folder)/descriptors-descriptor-singleton.o \
$(release_folder)/common-datum.o \
$(release_folder)/neuro-fuzzy-fac_prototype_minkowski_regression.o \
$(release_folder)/readers-train_validation_test_model.o \
$(release_folder)/metrics-metric-minkowski.o \
$(release_folder)/tnorms-t-norm-sugeno-weber.o \
$(release_folder)/common-data-modifier-normaliser.o \
Expand Down Expand Up @@ -1166,6 +1184,7 @@ $(debug_folder)/metrics-metric-chebyshev.o \
$(debug_folder)/common-data-modifier-imputer-knn-average.o \
$(debug_folder)/tnorms-t-norm-frank.o \
$(debug_folder)/partitions-cluster.o \
$(debug_folder)/readers-cross_validation_model.o \
$(debug_folder)/partitions-fcm-possibilistic.o \
$(debug_folder)/common-data-modifier-filter.o \
$(debug_folder)/auxiliary-tempus.o \
Expand Down Expand Up @@ -1234,8 +1253,8 @@ $(debug_folder)/partitions-fcm.o \
$(debug_folder)/neuro-fuzzy-ma.o \
$(debug_folder)/descriptors-descriptor-interval-gaussian-subspace.o \
$(debug_folder)/descriptors-descriptor-triangular.o \
$(debug_folder)/readers-weighted_reader_complete.o \
$(debug_folder)/implications-imp-fodor.o \
$(debug_folder)/readers-weighted_reader_complete.o \
$(debug_folder)/snorms-s-norm-lukasiewicz.o \
$(debug_folder)/tnorms-t-norm-lukasiewicz.o \
$(debug_folder)/common-data-modifier-imputer-knn-median.o \
Expand Down Expand Up @@ -1290,6 +1309,7 @@ $(debug_folder)/tnorms-t-norm-product.o \
$(debug_folder)/partitions-fcm-T.o \
$(debug_folder)/neuro-fuzzy-granular_subspace_annbfis_classification.o \
$(debug_folder)/neuro-fuzzy-granular_nfs.o \
$(debug_folder)/readers-train_test_model.o \
$(debug_folder)/auxiliary-mathematics.o \
$(debug_folder)/tnorms-t-norm-fodor.o \
$(debug_folder)/neuro-fuzzy-consequence-MA.o \
Expand All @@ -1298,6 +1318,7 @@ $(debug_folder)/descriptors-descriptor-gaussian-subspace.o \
$(debug_folder)/descriptors-descriptor-singleton.o \
$(debug_folder)/common-datum.o \
$(debug_folder)/neuro-fuzzy-fac_prototype_minkowski_regression.o \
$(debug_folder)/readers-train_validation_test_model.o \
$(debug_folder)/metrics-metric-minkowski.o \
$(debug_folder)/tnorms-t-norm-sugeno-weber.o \
$(debug_folder)/common-data-modifier-normaliser.o \
Expand Down
42 changes: 42 additions & 0 deletions source/readers/cross_validation_model.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/** @file */

#include "cross_validation_model.h"

ksi::cross_validation_model::cross_validation_model(ksi::reader& source_reader)
: pReader(source_reader.clone()) {}

ksi::cross_validation_model::cross_validation_model(const cross_validation_model& other)
: pReader(other.pReader)
{
std::lock_guard<std::mutex> lock(other.datasets_mutex);
datasets = other.datasets;
}

ksi::cross_validation_model::cross_validation_model(cross_validation_model&& other) noexcept
: pReader(std::move(other.pReader))
{
std::lock_guard<std::mutex> lock(other.datasets_mutex);
datasets = std::move(other.datasets);
}

ksi::cross_validation_model& ksi::cross_validation_model::operator=(const cross_validation_model& other)
{
if (this != &other)
{
pReader = other.pReader;
std::lock_guard<std::mutex> lock(other.datasets_mutex);
datasets = other.datasets;
}
return *this;
}

ksi::cross_validation_model& ksi::cross_validation_model::operator=(cross_validation_model&& other) noexcept
{
if (this != &other)
{
pReader = std::move(other.pReader);
std::lock_guard<std::mutex> lock(other.datasets_mutex);
datasets = std::move(other.datasets);
}
return *this;
}
141 changes: 141 additions & 0 deletions source/readers/cross_validation_model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/** @file */

#ifndef CROSS_VALIDATION_MODEL_H
#define CROSS_VALIDATION_MODEL_H

#include "reader.h"

#include <filesystem>
#include <memory>
#include <vector>
#include <mutex>

namespace ksi
{
/**
* @class cross_validation_model
* An abstract class representing the base class for the cross-validation models.
*
* @date 2024-05-30
* @author Konrad Wnuk
*/
class cross_validation_model : public reader
{
protected:
/** Pointer to a reader object which is used to read datasets */
std::shared_ptr<reader> pReader = nullptr;

/** Vector of datasets holds the subsets of the dataset */
std::vector<dataset> datasets;

/** Mutex for synchronizing access to datasets */
mutable std::mutex datasets_mutex;

public:
/**
* Constructor for cross_validation_model.
*
* @param source_reader The reader object to initialize the pReader with.
* @date 2024-06-04
* @author Konrad Wnuk
*/
cross_validation_model(reader& source_reader);

/**
* Copy constructor.
*
* @param other The cross_validation object to copy.
* @date 2024-06-08
* @author Konrad Wnuk
*/
cross_validation_model(const cross_validation_model& other);

/**
* Move constructor.
*
* @param other The cross_validation object to move.
* @date 2024-06-08
* @author Konrad Wnuk
*/
cross_validation_model(cross_validation_model&& other) noexcept;

/**
* Copy assignment operator.
*
* @param other The cross_validation object to copy.
* @return Reference to the copied cross_validation object.
* @date 2024-06-08
* @author Konrad Wnuk
*/
cross_validation_model& operator=(const cross_validation_model& other);

/**
* Move assignment operator.
*
* @param other The cross_validation object to move.
* @return Reference to the moved cross_validation object.
* @date 2024-06-08
* @author Konrad Wnuk
*/
cross_validation_model& operator=(cross_validation_model&& other) noexcept;

/**
* Virtual destructor for cross_validation_model.
*/
virtual ~cross_validation_model() = default;

/**
* Splits the data into a specified number of subsets.
*
* @param base_dataset The dataset to be split.
* @param n The number of subsets to split the data into. Default is 10.
* @date 2024-05-30
* @author Konrad Wnuk
*/
virtual void split(const dataset & base_dataset, const unsigned int n = 10) = 0;

/**
* Saves the data to a specified directory.
*
* @param directory The directory where the data will be saved.
* @param filename The base name for the files to be saved. Default is "dataset".
* @param extension The file extension for the files to be saved. Default is ".data".
* @param overwrite Flag to control whether to overwrite the existing files. Default is false.
* @date 2024-06-30
* @author Konrad Wnuk
*/
virtual void save(const std::filesystem::path& directory, const std::filesystem::path& filename = "dataset", const std::filesystem::path& extension = ".data", const bool overwrite = false) const = 0;

/**
* Reads data from a specified file.
*
* @param file_directory The path of the file to read the data from.
* @return A dataset containing the data read from the file.
* @date 2024-05-30
* @author Konrad Wnuk
*/
virtual dataset read_file(const std::filesystem::path& file_directory) = 0;

/**
* Reads data from a specified directory.
*
* @param directory The directory to read the data from.
* @param file_regex_pattern The pattern to match the files. Defaults to ".*\\.data$".
* @date 2024-05-30
* @author Konrad Wnuk
*/
virtual void read_directory(const std::filesystem::path& directory, const std::string& file_regex_pattern = ".*\\.data$") = 0;

/**
* Reads data from a file and splits it into subsets.
*
* @param file_path The path of the file to read the data from.
* @param n The number of subsets to split the data into.
* @date 2024-06-14
* @author Konrad Wnuk
*/
virtual void read_and_split_file(const std::filesystem::path& file_path, const unsigned int n = 10) = 0;
};
}

#endif
Loading

0 comments on commit 9cf31f7

Please sign in to comment.