diff --git a/lib/src/Base/Stat/CMakeLists.txt b/lib/src/Base/Stat/CMakeLists.txt index 09a8969de1..cf85ca68fe 100644 --- a/lib/src/Base/Stat/CMakeLists.txt +++ b/lib/src/Base/Stat/CMakeLists.txt @@ -7,6 +7,7 @@ ot_add_source_file (Compact.cxx) ot_add_source_file (CorrelationAnalysis.cxx) ot_add_source_file (CorrelationMatrix.cxx) ot_add_source_file (CovarianceMatrix.cxx) +ot_add_source_file (CSVParser.cxx) ot_add_source_file (FaureSequence.cxx) ot_add_source_file (Full.cxx) ot_add_source_file (HaltonSequence.cxx) @@ -75,6 +76,7 @@ ot_add_source_file (LatentVariableModel.cxx) ot_install_header_file (HistoryStrategyImplementation.hxx) ot_install_header_file (SobolSequence.hxx) ot_install_header_file (CovarianceMatrix.hxx) +ot_install_header_file (CSVParser.hxx) ot_install_header_file (TestResult.hxx) ot_install_header_file (FaureSequence.hxx) ot_install_header_file (HaltonSequence.hxx) diff --git a/lib/src/Base/Stat/CSVParser.cxx b/lib/src/Base/Stat/CSVParser.cxx new file mode 100644 index 0000000000..285c9b2872 --- /dev/null +++ b/lib/src/Base/Stat/CSVParser.cxx @@ -0,0 +1,226 @@ +// -*- C++ -*- +/** + * @brief CSV parser + * + * Copyright 2005-2025 Airbus-EDF-IMACS-ONERA-Phimeca + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library. If not, see . + * + */ + +#include "openturns/CSVParser.hxx" +#include "openturns/PersistentObjectFactory.hxx" +#include "rapidcsv.h" + +BEGIN_NAMESPACE_OPENTURNS + +/** + * @class CSVParser + */ + +CLASSNAMEINIT(CSVParser) + +static const Factory Factory_CSVParser; + +/* Constructor without parameters */ +CSVParser::CSVParser() + : PersistentObject() +{ + // Nothing to do +} + +CSVParser::CSVParser(const String & fileName) +: PersistentObject() +, fileName_(fileName) {} + + +/* Virtual constructor */ +CSVParser * CSVParser::clone() const +{ + return new CSVParser(*this); +} + +/* String converter */ +String CSVParser::__repr__() const +{ + OSS oss(true); + oss << "class= " << CSVParser::GetClassName(); + return oss; +} + +/* String converter */ +String CSVParser::__str__(const String & ) const +{ + OSS oss(false); + oss << CSVParser::GetClassName() << "(separator = " << fieldSeparator_ << ")"; + return oss; +} + +/* Method save() stores the object through the StorageManager */ +void CSVParser::save(Advocate & adv) const +{ + PersistentObject::save(adv); + adv.saveAttribute( "fileName_", fileName_); + adv.saveAttribute( "fieldSeparator_", static_cast(fieldSeparator_)); + adv.saveAttribute( "allowComments_", allowComments_); + adv.saveAttribute( "allowEmptyLines_", allowEmptyLines_); + adv.saveAttribute( "skippedLinesNumber_", skippedLinesNumber_); + adv.saveAttribute( "decimalSeparator_", static_cast(decimalSeparator_)); +} + +/* Method load() reloads the object from the StorageManager */ +void CSVParser::load(Advocate & adv) +{ + PersistentObject::load(adv); + adv.loadAttribute( "fileName_", fileName_); + UnsignedInteger fieldSeparator = 0; + adv.loadAttribute( "fieldSeparator_", fieldSeparator); + fieldSeparator_ = fieldSeparator; + adv.loadAttribute( "allowComments_", allowComments_); + adv.loadAttribute( "allowEmptyLines_", allowEmptyLines_); + adv.loadAttribute( "skippedLinesNumber_", skippedLinesNumber_); + UnsignedInteger decimalSeparator = 0; + adv.loadAttribute( "decimalSeparator_", decimalSeparator); + decimalSeparator_ = decimalSeparator; +} + +void CSVParser::setFieldSeparator(const char fieldSeparator) +{ + fieldSeparator_ = fieldSeparator; +} + +void CSVParser::setAllowComments(const Bool allowComments) +{ + allowComments_ = allowComments; +} + +void CSVParser::setAllowEmptyLines(const Bool allowEmptyLines) +{ + allowEmptyLines_ = allowEmptyLines; +} + +void CSVParser::setSkippedLinesNumber(const UnsignedInteger skippedLinesNumber) +{ + skippedLinesNumber_ = skippedLinesNumber; +} + +void CSVParser::setNumericalSeparator(const char decimalSeparator) +{ + decimalSeparator_ = decimalSeparator; +} + +SampleImplementation CSVParser::load() const +{ + std::ifstream theFile(fileName_); + if (!theFile.is_open()) + throw FileNotFoundException(HERE) << "Cannot open file '" << fileName_ << "'. Reason: " << std::strerror(errno); + if (fieldSeparator_ == decimalSeparator_) + throw InvalidArgumentException(HERE) << "The field separator must be different from the decimal separator"; + rapidcsv::LabelParams pLabelParams(-1, -1); + rapidcsv::SeparatorParams pSeparatorParams(fieldSeparator_, true); + rapidcsv::ConverterParams pConverterParams; + rapidcsv::LineReaderParams pLineReaderParams; + pLineReaderParams.mSkipCommentLines = allowComments_; + const String commentMarkers(ResourceMap::GetAsString("Sample-CommentMarker")); + if (commentMarkers.size() != 1) + throw InvalidArgumentException(HERE) << "The entry Sample-CommentMarker must be a string of size 1"; + if (allowEmptyLines_ && (commentMarkers[0] == fieldSeparator_ || commentMarkers[0] == decimalSeparator_)) + throw InvalidArgumentException(HERE) << "The comment marker must be different from the field and decimal separators"; + pLineReaderParams.mCommentPrefix = commentMarkers[0]; + pLineReaderParams.mSkipEmptyLines = allowEmptyLines_; + pLineReaderParams.mSkippedLinesCount = skippedLinesNumber_; + Description description; + rapidcsv::Document doc(fileName_, pLabelParams, pSeparatorParams, pConverterParams, pLineReaderParams); + SampleImplementation result(doc.GetRowCount(), doc.GetColumnCount()); + Bool haveHeaders = false; + std::map infMap; + infMap["inf"] = SpecFunc::Infinity; + infMap["+inf"] = SpecFunc::Infinity; + infMap["-inf"] = -SpecFunc::Infinity; + infMap["Inf"] = SpecFunc::Infinity; + infMap["+Inf"] = SpecFunc::Infinity; + infMap["-Inf"] = -SpecFunc::Infinity; + infMap["INF"] = SpecFunc::Infinity; + infMap["+INF"] = SpecFunc::Infinity; + infMap["-INF"] = -SpecFunc::Infinity; + for (UnsignedInteger i = 0; i < doc.GetRowCount(); ++ i) + { + for (UnsignedInteger j = 0; j < doc.GetColumnCount(); ++ j) + { + try + { + const String pStr = doc.GetCell(j, i); + std::istringstream iss(pStr); + iss.imbue(std::locale(std::locale::classic(), new CSVParserFormat(decimalSeparator_))); + Scalar pVal = 0.0; + iss >> pVal; + if (iss.fail() || iss.bad() || !iss.eof()) + { + // handle inf values + try + { + pVal = infMap.at(pStr); + } + catch (std::out_of_range &) + { + // invalid values are set to nan + pVal = std::numeric_limits::quiet_NaN(); + } + + // headers if unparsable values on the first row + if (i == 0) + haveHeaders = true; + } + result(i, j) = pVal; + } + catch (...) + { + // GetCell fails when line is incomplete + result(i, j) = std::numeric_limits::quiet_NaN(); + } + } + } + + if (haveHeaders) + { + description.resize(doc.GetColumnCount()); + for (UnsignedInteger j = 0; j < doc.GetColumnCount(); ++ j) + { + try + { + description[j] = doc.GetCell(j, 0); + } + catch (...) + { + // line may be incomplete + } + } + // reject empty components + for (UnsignedInteger j = 0; j < description.getSize(); ++ j) + if (description[j].empty()) + { + description = Description::BuildDefault(result.getDimension(), "data_"); + break; + } + result.setDescription(description); + result.erase(0, 1); + } + else + result.setDescription(Description::BuildDefault(result.getDimension(), "data_")); + result.setName(fileName_); + return result; +} + + +END_NAMESPACE_OPENTURNS diff --git a/lib/src/Base/Stat/SampleImplementation.cxx b/lib/src/Base/Stat/SampleImplementation.cxx index d3c7748be2..2bfdad2fc5 100644 --- a/lib/src/Base/Stat/SampleImplementation.cxx +++ b/lib/src/Base/Stat/SampleImplementation.cxx @@ -38,9 +38,9 @@ #include "openturns/TBBImplementation.hxx" #include "openturns/SpecFunc.hxx" #include "openturns/Lapack.hxx" +#include "openturns/CSVParser.hxx" #include "kendall.h" -#include "rapidcsv.h" BEGIN_NAMESPACE_OPENTURNS @@ -280,159 +280,6 @@ static const Factory Factory_SampleImplementation; CLASSNAMEINIT(SampleImplementation) -struct CSVParserFormat : std::numpunct -{ - explicit CSVParserFormat(const char_type decimalSeparator) - : decimalSeparator_(decimalSeparator) {} - - char_type do_decimal_point() const { return decimalSeparator_; } - char_type decimalSeparator_; -}; - - -class CSVParser -{ -public: - CSVParser(const String & fileName) - : fileName_(fileName) {} - - void setFieldSeparator(const char fieldSeparator) - { - fieldSeparator_ = fieldSeparator; - } - - void setAllowComments(const Bool allowComments) - { - allowComments_ = allowComments; - } - - void setAllowEmptyLines(const Bool allowEmptyLines) - { - allowEmptyLines_ = allowEmptyLines; - } - - void setSkippedLinesNumber(const UnsignedInteger skippedLinesNumber) - { - skippedLinesNumber_ = skippedLinesNumber; - } - - void setNumericalSeparator(const char decimalSeparator) - { - decimalSeparator_ = decimalSeparator; - } - - SampleImplementation load() const - { - std::ifstream theFile(fileName_); - if (!theFile.is_open()) - throw FileNotFoundException(HERE) << "Cannot open file '" << fileName_ << "'. Reason: " << std::strerror(errno); - if (fieldSeparator_ == decimalSeparator_) - throw InvalidArgumentException(HERE) << "The field separator must be different from the decimal separator"; - rapidcsv::LabelParams pLabelParams(-1, -1); - rapidcsv::SeparatorParams pSeparatorParams(fieldSeparator_, true); - rapidcsv::ConverterParams pConverterParams; - rapidcsv::LineReaderParams pLineReaderParams; - pLineReaderParams.mSkipCommentLines = allowComments_; - const String commentMarkers(ResourceMap::GetAsString("Sample-CommentMarker")); - if (commentMarkers.size() != 1) - throw InvalidArgumentException(HERE) << "The entry Sample-CommentMarker must be a string of size 1"; - if (allowEmptyLines_ && (commentMarkers[0] == fieldSeparator_ || commentMarkers[0] == decimalSeparator_)) - throw InvalidArgumentException(HERE) << "The comment marker must be different from the field and decimal separators"; - pLineReaderParams.mCommentPrefix = commentMarkers[0]; - pLineReaderParams.mSkipEmptyLines = allowEmptyLines_; - pLineReaderParams.mSkippedLinesCount = skippedLinesNumber_; - Description description; - rapidcsv::Document doc(fileName_, pLabelParams, pSeparatorParams, pConverterParams, pLineReaderParams); - SampleImplementation result(doc.GetRowCount(), doc.GetColumnCount()); - Bool haveHeaders = false; - std::map infMap; - infMap["inf"] = SpecFunc::Infinity; - infMap["+inf"] = SpecFunc::Infinity; - infMap["-inf"] = -SpecFunc::Infinity; - infMap["Inf"] = SpecFunc::Infinity; - infMap["+Inf"] = SpecFunc::Infinity; - infMap["-Inf"] = -SpecFunc::Infinity; - infMap["INF"] = SpecFunc::Infinity; - infMap["+INF"] = SpecFunc::Infinity; - infMap["-INF"] = -SpecFunc::Infinity; - for (UnsignedInteger i = 0; i < doc.GetRowCount(); ++ i) - { - for (UnsignedInteger j = 0; j < doc.GetColumnCount(); ++ j) - { - try - { - const String pStr = doc.GetCell(j, i); - std::istringstream iss(pStr); - iss.imbue(std::locale(std::locale::classic(), new CSVParserFormat(decimalSeparator_))); - Scalar pVal = 0.0; - iss >> pVal; - if (iss.fail() || iss.bad() || !iss.eof()) - { - // handle inf values - try - { - pVal = infMap.at(pStr); - } - catch (std::out_of_range &) - { - // invalid values are set to nan - pVal = std::numeric_limits::quiet_NaN(); - } - - // headers if unparsable values on the first row - if (i == 0) - haveHeaders = true; - } - result(i, j) = pVal; - } - catch (...) - { - // GetCell fails when line is incomplete - result(i, j) = std::numeric_limits::quiet_NaN(); - } - } - } - - if (haveHeaders) - { - description.resize(doc.GetColumnCount()); - for (UnsignedInteger j = 0; j < doc.GetColumnCount(); ++ j) - { - try - { - description[j] = doc.GetCell(j, 0); - } - catch (...) - { - // line may be incomplete - } - } - // reject empty components - for (UnsignedInteger j = 0; j < description.getSize(); ++ j) - if (description[j].empty()) - { - description = Description::BuildDefault(result.getDimension(), "data_"); - break; - } - result.setDescription(description); - result.erase(0, 1); - } - else - result.setDescription(Description::BuildDefault(result.getDimension(), "data_")); - result.setName(fileName_); - return result; - } -private: - String fileName_; - char fieldSeparator_ = ','; - Bool allowComments_ = false; - Bool allowEmptyLines_ = false; - UnsignedInteger skippedLinesNumber_ = 0; - char decimalSeparator_ = '.'; -}; - - - /* Factory of SampleImplementation from CSV file */ SampleImplementation SampleImplementation::BuildFromCSVFile(const FileName & fileName, const String & separator) @@ -2132,7 +1979,7 @@ void SampleImplementation::exportToCSVFile(const FileName & fileName, if (csvFile.fail()) throw FileOpenException(HERE) << "Could not open file " << fileName; - csvFile.imbue(std::locale(std::locale::classic(), new CSVParserFormat(decimalSeparator[0]))); + csvFile.imbue(std::locale(std::locale::classic(), new CSVParser::CSVParserFormat(decimalSeparator[0]))); // Export the description if (!p_description_.isNull()) diff --git a/lib/src/Base/Stat/openturns/CSVParser.hxx b/lib/src/Base/Stat/openturns/CSVParser.hxx new file mode 100644 index 0000000000..935930caaf --- /dev/null +++ b/lib/src/Base/Stat/openturns/CSVParser.hxx @@ -0,0 +1,92 @@ +// -*- C++ -*- +/** + * @brief CSV parser + * + * Copyright 2005-2025 Airbus-EDF-IMACS-ONERA-Phimeca + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library. If not, see . + * + */ +#ifndef OPENTURNS_CSVPARSER_HXX +#define OPENTURNS_CSVPARSER_HXX + +#include "openturns/PersistentObject.hxx" +#include "openturns/SampleImplementation.hxx" +#include + +BEGIN_NAMESPACE_OPENTURNS + +/** + * @class CSVParser + */ + +class OT_API CSVParser : public PersistentObject +{ + CLASSNAME +public: + + /** Default constructor without parameters */ + CSVParser(); + + /** Parameters constructor */ + CSVParser(const String & fileName); + + /** Virtual copy constructor */ + CSVParser * clone() const override; + + /** Options accessors */ + void setFieldSeparator(const char fieldSeparator); + void setAllowComments(const Bool allowComments); + void setAllowEmptyLines(const Bool allowEmptyLines); + void setSkippedLinesNumber(const UnsignedInteger skippedLinesNumber); + void setNumericalSeparator(const char decimalSeparator); + + SampleImplementation load() const; + + /** String converter */ + String __repr__() const override; + + /** String converter */ + String __str__(const String & offset = "") const override; + + /** Method save() stores the object through the StorageManager */ + void save(Advocate & adv) const override; + + /** Method load() reloads the object from the StorageManager */ + void load(Advocate & adv) override; + + struct CSVParserFormat : std::numpunct + { + explicit CSVParserFormat(const char_type decimalSeparator) + : decimalSeparator_(decimalSeparator) {} + + char_type do_decimal_point() const { return decimalSeparator_; } + char_type decimalSeparator_; + }; + +private: + String fileName_; + + // options + char fieldSeparator_ = ','; + Bool allowComments_ = false; + Bool allowEmptyLines_ = false; + UnsignedInteger skippedLinesNumber_ = 0; + char decimalSeparator_ = '.'; + +}; /* class CSVParser */ + +END_NAMESPACE_OPENTURNS + +#endif /* OPENTURNS_FILTERINWINDOWSIMPLEMENTATION_HXX */