From 188257bc9b3098ae33927a59222823761bba9596 Mon Sep 17 00:00:00 2001 From: Edi Prifti Date: Thu, 2 Nov 2023 15:49:06 -0400 Subject: [PATCH] fixing issue with documentation initial setup run build --- .Rbuildignore | 4 +- .github/workflows/pkgdown.yaml | 2 +- .gitignore | 1 - docs/Authors.md | 14 + docs/Contact.md | 6 + docs/Contributing.md | 9 + docs/FAQs.md | 9 + docs/Features.md | 9 + docs/Installation.html | 411 ++++++++++++++++++ docs/Installation.md | 21 + docs/Screenshots.md | 9 + docs/Technologies.md | 8 + docs/Usage.md | 10 + man/AnalyseStableModels_LOO.Rd | 23 + man/LPO_best_models.Rd | 25 ++ man/analyzeImportanceFeatures.Rd | 58 +++ man/analyzeImportanceFeaturesFBM.Rd | 72 +++ man/analyzePopulationFeatures.Rd | 63 +++ man/bestModelFeatureStability.Rd | 25 ++ man/bestModelStability.Rd | 25 ++ man/cir_test.Rd | 17 + man/cir_train.Rd | 17 + man/cleanPopulation.Rd | 19 + man/computeCardEnrichment.Rd | 19 + man/computeCoeffSVMLin.Rd | 23 + man/computeConfusionMatrix.Rd | 23 + man/computeEffectSizes.Rd | 27 ++ man/computeFeatureMetrics.Rd | 18 + man/computeIntercept.Rd | 27 ++ man/counter.Rd | 11 + man/crossing.Rd | 23 + man/denseVecToModel.Rd | 27 ++ man/digest.Rd | 43 ++ man/digestModelCollection.Rd | 28 ++ man/disectModel.Rd | 25 ++ man/estimateFeatureImportance.Rd | 34 ++ man/evaluateAUC.Rd | 24 + man/evaluateAccuracy.Rd | 34 ++ man/evaluateAdditionnalMetrics.Rd | 25 ++ man/evaluateFeatureImportanceInPopulation.Rd | 55 +++ man/evaluateFit.Rd | 28 ++ man/evaluateIntercept.Rd | 23 + man/evaluateModel.Rd | 41 ++ man/evaluateModelRegression.Rd | 34 ++ man/evaluatePopulation.Rd | 47 ++ man/evaluatePrevalence.Rd | 19 + man/evaluateYhat.Rd | 37 ++ man/evolve.Rd | 25 ++ man/evolve2m.Rd | 32 ++ man/evolve3m.Rd | 32 ++ man/filterFeaturesByPrevalence.Rd | 32 ++ man/filterNoSignal.Rd | 20 + man/filterfeaturesK.Rd | 47 ++ man/findk.Rd | 40 ++ man/fit.Rd | 60 +++ man/generateAllCombinations.Rd | 11 + man/generator_metal.Rd | 44 ++ man/getFeaturePrevalence.Rd | 25 ++ man/getFitIndividual.Rd | 17 + man/getFitModel.Rd | 17 + man/getFitModels.Rd | 17 + man/getFitPopulation.Rd | 17 + man/getGraph.Rd | 21 + man/getImportanceFeaturesFBMobjects.Rd | 51 +++ man/getIndicesIndividual.Rd | 17 + man/getIndicesPopulation.Rd | 17 + man/getMaxMinPrevalenceModel.Rd | 23 + man/getModelScore.Rd | 21 + man/getNBestModels.Rd | 66 +++ man/getSign.Rd | 23 + man/get_IndividualToBeMutated.Rd | 18 + man/get_Parents.Rd | 18 + man/glmnetRR.Rd | 11 + man/ibd.Rd | 18 + man/index2names.Rd | 19 + man/individual.Rd | 39 ++ man/isClf.Rd | 17 + man/isExperiment.Rd | 17 + man/isLearnerSota.Rd | 17 + man/isModel.Rd | 17 + man/isModelBTR.Rd | 17 + man/isModelCollection.Rd | 17 + man/isModelSota.Rd | 17 + man/isModelSotaGLMNET.Rd | 17 + man/isModelSotaRF.Rd | 17 + man/isModelSotaSVM.Rd | 17 + man/isModelTerda.Rd | 17 + man/isPopulation.Rd | 17 + man/isclose.Rd | 20 + man/listOfDenseVecToListOfModels.Rd | 25 ++ man/listOfDenseVecToModelCollection.Rd | 23 + man/listOfModels2ModelCollection.Rd | 19 + man/listOfModelsToDenseCoefMatrix.Rd | 34 ++ man/listOfModelsToListOfDenseVec.Rd | 23 + man/listOfModelsToListOfSparseVec.Rd | 17 + man/listOfSparseVecToListOfModels.Rd | 27 ++ man/loadPopulation.Rd | 17 + man/loadResults.Rd | 11 + man/make.counter.Rd | 11 + man/makeFeatureAnnot.Rd | 24 + ...akeFeatureModelPrevalenceNetworkCooccur.Rd | 32 ++ man/makeFeatureModelPrevalenceNetworkMiic.Rd | 32 ++ man/mergeMeltBestScoreCV.Rd | 32 ++ man/mergeMeltImportanceCV.Rd | 56 +++ man/mergeMeltScoreCV.Rd | 31 ++ man/mergeMeltScoreEmpirical.Rd | 24 + man/mergeResults.Rd | 35 ++ man/metal.Rd | 98 +++++ man/modelCollectionToPopulation.Rd | 14 + man/modelToDenseVec.Rd | 19 + man/multipleRR.Rd | 27 ++ man/multipleRR_par.Rd | 27 ++ man/mutate.Rd | 23 + man/myAssert.Rd | 18 + man/myAssertNotNullNorNa.Rd | 19 + man/names2index.Rd | 19 + man/normModelCoeffs.Rd | 25 ++ man/obesity.Rd | 17 + man/plotAUC.Rd | 25 ++ man/plotAUCg.Rd | 27 ++ man/plotAbundanceByClass.Rd | 40 ++ man/plotComparativeBestCV.Rd | 30 ++ man/plotComparativeCV.Rd | 34 ++ man/plotComparativeEmpiricalScore.Rd | 28 ++ man/plotComparativeResults.Rd | 33 ++ man/plotComparativeResultsBest.Rd | 19 + man/plotFeatureModelCoeffs.Rd | 31 ++ man/plotImportanceFeaturesFBMobjects.Rd | 37 ++ man/plotModel.Rd | 46 ++ man/plotModelScore.Rd | 25 ++ man/plotPopulation.Rd | 40 ++ man/plotPrevalence.Rd | 43 ++ man/plotScoreBarcode.Rd | 25 ++ man/population.Rd | 32 ++ man/populationGet_X.Rd | 21 + man/populationSet_X.Rd | 19 + man/populationToDataFrame.Rd | 24 + man/printClassifier.Rd | 19 + man/printExperiment.Rd | 19 + man/printModel.Rd | 24 + man/printModelCollection.Rd | 23 + man/printPopulation.Rd | 21 + man/printy.Rd | 15 + man/resetTags.Rd | 23 + man/runClassifier.Rd | 25 ++ man/runCrossval.Rd | 25 ++ man/savePopulation.Rd | 16 + man/saveResults.Rd | 11 + man/scoreRatio.Rd | 21 + man/selectBestPopulation.Rd | 25 ++ man/selector_v1.Rd | 11 + man/sim_inter.Rd | 19 + man/sim_intra.Rd | 19 + man/sortPopulation.Rd | 21 + man/sota.glmnet.Rd | 17 + man/sota.rf.Rd | 118 +++++ man/sota.svm.Rd | 115 +++++ man/sparseVecToModel.Rd | 27 ++ man/summarySE.Rd | 30 ++ man/t2d.Rd | 18 + man/t2dw.Rd | 19 + man/tag_Couples.Rd | 21 + man/tag_SelectElite.Rd | 21 + man/tag_SelectRandom.Rd | 23 + man/tag_ToBeMutated.Rd | 23 + man/tag_select.Rd | 24 + man/terBeam.Rd | 105 +++++ man/terda.Rd | 114 +++++ man/terga1.Rd | 127 ++++++ man/terga2.Rd | 196 +++++++++ man/updateModelIndex.Rd | 19 + man/updateObjectIndex.Rd | 19 + 172 files changed, 5301 insertions(+), 4 deletions(-) create mode 100644 docs/Authors.md create mode 100644 docs/Contact.md create mode 100644 docs/Contributing.md create mode 100644 docs/FAQs.md create mode 100644 docs/Features.md create mode 100644 docs/Installation.html create mode 100644 docs/Installation.md create mode 100644 docs/Screenshots.md create mode 100644 docs/Technologies.md create mode 100644 docs/Usage.md create mode 100644 man/AnalyseStableModels_LOO.Rd create mode 100644 man/LPO_best_models.Rd create mode 100644 man/analyzeImportanceFeatures.Rd create mode 100644 man/analyzeImportanceFeaturesFBM.Rd create mode 100644 man/analyzePopulationFeatures.Rd create mode 100644 man/bestModelFeatureStability.Rd create mode 100644 man/bestModelStability.Rd create mode 100644 man/cir_test.Rd create mode 100644 man/cir_train.Rd create mode 100644 man/cleanPopulation.Rd create mode 100644 man/computeCardEnrichment.Rd create mode 100644 man/computeCoeffSVMLin.Rd create mode 100644 man/computeConfusionMatrix.Rd create mode 100644 man/computeEffectSizes.Rd create mode 100644 man/computeFeatureMetrics.Rd create mode 100644 man/computeIntercept.Rd create mode 100644 man/counter.Rd create mode 100644 man/crossing.Rd create mode 100644 man/denseVecToModel.Rd create mode 100644 man/digest.Rd create mode 100644 man/digestModelCollection.Rd create mode 100644 man/disectModel.Rd create mode 100644 man/estimateFeatureImportance.Rd create mode 100644 man/evaluateAUC.Rd create mode 100644 man/evaluateAccuracy.Rd create mode 100644 man/evaluateAdditionnalMetrics.Rd create mode 100644 man/evaluateFeatureImportanceInPopulation.Rd create mode 100644 man/evaluateFit.Rd create mode 100644 man/evaluateIntercept.Rd create mode 100644 man/evaluateModel.Rd create mode 100644 man/evaluateModelRegression.Rd create mode 100644 man/evaluatePopulation.Rd create mode 100644 man/evaluatePrevalence.Rd create mode 100644 man/evaluateYhat.Rd create mode 100644 man/evolve.Rd create mode 100644 man/evolve2m.Rd create mode 100644 man/evolve3m.Rd create mode 100644 man/filterFeaturesByPrevalence.Rd create mode 100644 man/filterNoSignal.Rd create mode 100644 man/filterfeaturesK.Rd create mode 100644 man/findk.Rd create mode 100644 man/fit.Rd create mode 100644 man/generateAllCombinations.Rd create mode 100644 man/generator_metal.Rd create mode 100644 man/getFeaturePrevalence.Rd create mode 100644 man/getFitIndividual.Rd create mode 100644 man/getFitModel.Rd create mode 100644 man/getFitModels.Rd create mode 100644 man/getFitPopulation.Rd create mode 100644 man/getGraph.Rd create mode 100644 man/getImportanceFeaturesFBMobjects.Rd create mode 100644 man/getIndicesIndividual.Rd create mode 100644 man/getIndicesPopulation.Rd create mode 100644 man/getMaxMinPrevalenceModel.Rd create mode 100644 man/getModelScore.Rd create mode 100644 man/getNBestModels.Rd create mode 100644 man/getSign.Rd create mode 100644 man/get_IndividualToBeMutated.Rd create mode 100644 man/get_Parents.Rd create mode 100644 man/glmnetRR.Rd create mode 100644 man/ibd.Rd create mode 100644 man/index2names.Rd create mode 100644 man/individual.Rd create mode 100644 man/isClf.Rd create mode 100644 man/isExperiment.Rd create mode 100644 man/isLearnerSota.Rd create mode 100644 man/isModel.Rd create mode 100644 man/isModelBTR.Rd create mode 100644 man/isModelCollection.Rd create mode 100644 man/isModelSota.Rd create mode 100644 man/isModelSotaGLMNET.Rd create mode 100644 man/isModelSotaRF.Rd create mode 100644 man/isModelSotaSVM.Rd create mode 100644 man/isModelTerda.Rd create mode 100644 man/isPopulation.Rd create mode 100644 man/isclose.Rd create mode 100644 man/listOfDenseVecToListOfModels.Rd create mode 100644 man/listOfDenseVecToModelCollection.Rd create mode 100644 man/listOfModels2ModelCollection.Rd create mode 100644 man/listOfModelsToDenseCoefMatrix.Rd create mode 100644 man/listOfModelsToListOfDenseVec.Rd create mode 100644 man/listOfModelsToListOfSparseVec.Rd create mode 100644 man/listOfSparseVecToListOfModels.Rd create mode 100644 man/loadPopulation.Rd create mode 100644 man/loadResults.Rd create mode 100644 man/make.counter.Rd create mode 100644 man/makeFeatureAnnot.Rd create mode 100644 man/makeFeatureModelPrevalenceNetworkCooccur.Rd create mode 100644 man/makeFeatureModelPrevalenceNetworkMiic.Rd create mode 100644 man/mergeMeltBestScoreCV.Rd create mode 100644 man/mergeMeltImportanceCV.Rd create mode 100644 man/mergeMeltScoreCV.Rd create mode 100644 man/mergeMeltScoreEmpirical.Rd create mode 100644 man/mergeResults.Rd create mode 100644 man/metal.Rd create mode 100644 man/modelCollectionToPopulation.Rd create mode 100644 man/modelToDenseVec.Rd create mode 100644 man/multipleRR.Rd create mode 100644 man/multipleRR_par.Rd create mode 100644 man/mutate.Rd create mode 100644 man/myAssert.Rd create mode 100644 man/myAssertNotNullNorNa.Rd create mode 100644 man/names2index.Rd create mode 100644 man/normModelCoeffs.Rd create mode 100644 man/obesity.Rd create mode 100644 man/plotAUC.Rd create mode 100644 man/plotAUCg.Rd create mode 100644 man/plotAbundanceByClass.Rd create mode 100644 man/plotComparativeBestCV.Rd create mode 100644 man/plotComparativeCV.Rd create mode 100644 man/plotComparativeEmpiricalScore.Rd create mode 100644 man/plotComparativeResults.Rd create mode 100644 man/plotComparativeResultsBest.Rd create mode 100644 man/plotFeatureModelCoeffs.Rd create mode 100644 man/plotImportanceFeaturesFBMobjects.Rd create mode 100644 man/plotModel.Rd create mode 100644 man/plotModelScore.Rd create mode 100644 man/plotPopulation.Rd create mode 100644 man/plotPrevalence.Rd create mode 100644 man/plotScoreBarcode.Rd create mode 100644 man/population.Rd create mode 100644 man/populationGet_X.Rd create mode 100644 man/populationSet_X.Rd create mode 100644 man/populationToDataFrame.Rd create mode 100644 man/printClassifier.Rd create mode 100644 man/printExperiment.Rd create mode 100644 man/printModel.Rd create mode 100644 man/printModelCollection.Rd create mode 100644 man/printPopulation.Rd create mode 100644 man/printy.Rd create mode 100644 man/resetTags.Rd create mode 100644 man/runClassifier.Rd create mode 100644 man/runCrossval.Rd create mode 100644 man/savePopulation.Rd create mode 100644 man/saveResults.Rd create mode 100644 man/scoreRatio.Rd create mode 100644 man/selectBestPopulation.Rd create mode 100644 man/selector_v1.Rd create mode 100644 man/sim_inter.Rd create mode 100644 man/sim_intra.Rd create mode 100644 man/sortPopulation.Rd create mode 100644 man/sota.glmnet.Rd create mode 100644 man/sota.rf.Rd create mode 100644 man/sota.svm.Rd create mode 100644 man/sparseVecToModel.Rd create mode 100644 man/summarySE.Rd create mode 100644 man/t2d.Rd create mode 100644 man/t2dw.Rd create mode 100644 man/tag_Couples.Rd create mode 100644 man/tag_SelectElite.Rd create mode 100644 man/tag_SelectRandom.Rd create mode 100644 man/tag_ToBeMutated.Rd create mode 100644 man/tag_select.Rd create mode 100644 man/terBeam.Rd create mode 100644 man/terda.Rd create mode 100644 man/terga1.Rd create mode 100644 man/terga2.Rd create mode 100644 man/updateModelIndex.Rd create mode 100644 man/updateObjectIndex.Rd diff --git a/.Rbuildignore b/.Rbuildignore index badd1b1..2a1e170 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,8 +3,8 @@ debug .Rcheck ..Rcheck -^vignettes/PredomicsPlotting\.Rmd$ -^vignettes/vignette\.Rmd$ +^vignettes$ +^vignettes_disabled$ ^_pkgdown\.yml$ ^docs$ ^pkgdown$ diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index ed7650c..43cdab9 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -45,4 +45,4 @@ jobs: with: clean: false branch: gh-pages - folder: docs + folder: pkgdown diff --git a/.gitignore b/.gitignore index 270a97c..69b0184 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,5 @@ vignettes/*_cache #NAMESPACE Profiling_* *.yml -man/ debug pkgdown diff --git a/docs/Authors.md b/docs/Authors.md new file mode 100644 index 0000000..aaf58aa --- /dev/null +++ b/docs/Authors.md @@ -0,0 +1,14 @@ +# Authors + +Predomics was created by: + +- **Edi Prifti**: Edi is the creator and maintainer of the `predomics` approach and package since 2015. He has proposed the genetic algorithm approaches (`TerGa1` and `TerGa2`), the concept of the Family of Best Models, numérous visuzalization tools and lots of other cool ideas. +- **Jean-Daniel Zucker**: Jean-Daniel has brought numerous founding ideas behind the method since 2015. He has also proposed the beam search heuristic (`TerBeam`) and has co-led the work. +- **Yann Chevaleyre**: Yann was involved in the initial developments and has worked on the concepts of balances and the mathematical optimization method (`TerDa`). +- **Blaise Hanczar**: Blaise was also involved in the initial developments and has contributed notably to the selection of the best models and other general ideas behind the framework. +- **Eugeni Belda**: Eugeni joined the project in 2019 and has brought his contribution in the interpretability of the signatures, notably applied to the human microbiome as well as their visualization. He has also extensively tested the package using a plethora of datasets. +- **Lucas Robin**: Lucas, joined the project in 2016 as part of a student project and has worked on the implementation of the `TerGa2` algorithm and has brought some code optimization elements. +- **Shasha Cui**: Shasha also joined the project as part of a student internship in 2017. Her work focused on the concepts of feature importance. +- **Magali Cousin Thorez**: Magali joined the project in 2019 during her a student internship. Her work focused on the simplification of the classification signatures and their exploration on the context of microbial ecosystems. +- **Youcef Sklab**: Youcef has co-led with Edi a couple student projects in collaboration with the ![Sup Galilée engineering school](https://www.sup-galilee.univ-paris13.fr) to build the R Shiny predomicsapp application. +- **Gaspar Roy**: Gaspar has worked on 2023 on on evolved version of the R shiny application ![predomicsapp](https://predomics.ummisco.ioit). diff --git a/docs/Contact.md b/docs/Contact.md new file mode 100644 index 0000000..b901bd3 --- /dev/null +++ b/docs/Contact.md @@ -0,0 +1,6 @@ +# Contact + +If you have any questions or feedback, please contact us at: + +- Email: [contact\@predomics.com](mailto:edi.prifti@ird.fr){.email} +- GitHub Issues: [Predomics Issues](https://github.com/predomics/predomicspkg/issues) diff --git a/docs/Contributing.md b/docs/Contributing.md new file mode 100644 index 0000000..fa984a7 --- /dev/null +++ b/docs/Contributing.md @@ -0,0 +1,9 @@ +# Contributing to Predomics + +We welcome contributions to the Predomics project! Here’s how you can help: + +1. **Reporting Bugs**: Open an issue in the [GitHub Issue Tracker](https://github.com/predomics/predomicspkg/issues). +2. **Suggesting Enhancements**: Have a feature in mind? Let us know by opening an issue. +3. **Contributing Code**: Fork the repository, make your changes, and submit a pull request. + +Please make sure to follow our [Code of Conduct](CODE_OF_CONDUCT.md) and read the [development guidelines](docs/Development.md) before contributing. diff --git a/docs/FAQs.md b/docs/FAQs.md new file mode 100644 index 0000000..29876c7 --- /dev/null +++ b/docs/FAQs.md @@ -0,0 +1,9 @@ +# Frequently Asked Questions + +**Q1: How do I install Predomics?** + +A1: Please refer to the [Installation](Installation.md) guide. + +**Q2: Where can I find examples on how to use Predomics?** + +A2: Check out the [Usage](Usage.md) and [Features](Features.md) sections for examples and detailed information. diff --git a/docs/Features.md b/docs/Features.md new file mode 100644 index 0000000..3cdca88 --- /dev/null +++ b/docs/Features.md @@ -0,0 +1,9 @@ +# Features + +Predomics offers a wide array of features: + +1. **Feature 1**: Description of Feature 1. +2. **Feature 2**: Description of Feature 2. +3. **Feature 3**: Description of Feature 3. + +Explore each feature to make the most out of Predomics. \ No newline at end of file diff --git a/docs/Installation.html b/docs/Installation.html new file mode 100644 index 0000000..cd22034 --- /dev/null +++ b/docs/Installation.html @@ -0,0 +1,411 @@ + + + + + + + + + + + + + +Installation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Installation

+

Follow these steps to install the Predomics package:

+
    +
  1. Install R from CRAN.
  2. +
  3. Install the required dependencies: +{r} R install.packages(c("dep1", "dep2", "dep3"))
  4. +
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/docs/Installation.md b/docs/Installation.md new file mode 100644 index 0000000..b64ff8c --- /dev/null +++ b/docs/Installation.md @@ -0,0 +1,21 @@ +# Installation + +Follow these steps to install the Predomics package: + +1. Install R from [CRAN](https://cran.r-project.org/). +2. Install the predomics package along with all its dependencies : + `doSNOW`, `foreach`, `snow`, `doRNG`, `gtools`, `glmnet`, `pROC`, `viridis`, `kernlab`, `randomForest`,`effsize` + +``` +devtools::install_github("predomics/predomicspkg", dependencies = TRUE) + +## install dependencies +# install.packages(c("doSNOW", "foreach", "snow", "doRNG", "gtools", "glmnet", "pROC", "viridis", "kernlab", "randomForest","effsize")) +# if (!requireNamespace("BiocManager", quietly = TRUE)) +# install.packages("BiocManager") +# BiocManager::install("BioQC") +# install.packages("testthat") +# install.packages("roxygen2") +``` + +Once eveything is installed un the check ... \ No newline at end of file diff --git a/docs/Screenshots.md b/docs/Screenshots.md new file mode 100644 index 0000000..c77bedd --- /dev/null +++ b/docs/Screenshots.md @@ -0,0 +1,9 @@ +# Screenshots + +Here are some screenshots to give you an overview of Predomics: + +![Screenshot 1](path/to/screenshot1.png) +Caption for Screenshot 1. + +![Screenshot 2](path/to/screenshot2.png) +Caption for Screenshot 2. \ No newline at end of file diff --git a/docs/Technologies.md b/docs/Technologies.md new file mode 100644 index 0000000..7d4bb16 --- /dev/null +++ b/docs/Technologies.md @@ -0,0 +1,8 @@ +# Technologies Used + +Predomics is built with the following technologies: + +- R +- Other Libraries + +We chose these technologies for their ... \ No newline at end of file diff --git a/docs/Usage.md b/docs/Usage.md new file mode 100644 index 0000000..6d55597 --- /dev/null +++ b/docs/Usage.md @@ -0,0 +1,10 @@ +# Usage + +Here's how you can get started with Predomics: + +``` +R +library(predomics) +result <- mainFunction(data) +print(result) +``` diff --git a/man/AnalyseStableModels_LOO.Rd b/man/AnalyseStableModels_LOO.Rd new file mode 100644 index 0000000..c67073c --- /dev/null +++ b/man/AnalyseStableModels_LOO.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stability.lib.R +\name{AnalyseStableModels_LOO} +\alias{AnalyseStableModels_LOO} +\title{analyse stability of models from digest} +\usage{ +AnalyseStableModels_LOO(X, y, clf, tmp, loo) +} +\arguments{ +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{tmp:}{the digested result object from digest} +} +\value{ +a list of each sparsity the frequency of each feature of empirical best model in k-folds cross validation +} +\description{ +This function analyses prevalence of features of best model of different sparsity in crossval (here still k-folds) +} diff --git a/man/LPO_best_models.Rd b/man/LPO_best_models.Rd new file mode 100644 index 0000000..c766cf7 --- /dev/null +++ b/man/LPO_best_models.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stability.lib.R +\name{LPO_best_models} +\alias{LPO_best_models} +\title{Compute the cross-validation of leave one out for test stability} +\usage{ +LPO_best_models(X, y, clf, p = 1, lfolds = NULL, return.all = FALSE, nk = 20) +} +\arguments{ +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{clf} + +\item{lfolds:}{leave one out folds for cross-validation} + +\item{return.all:}{return all results from the crossvalidation for feature stability testing} +} +\value{ +a list containing generalisation scores for each fold as well as a matrix with the mean values. +} +\description{ +Compute the cross-validation emprirical and generalization scores. +} diff --git a/man/analyzeImportanceFeatures.Rd b/man/analyzeImportanceFeatures.Rd new file mode 100644 index 0000000..8a6c603 --- /dev/null +++ b/man/analyzeImportanceFeatures.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{analyzeImportanceFeatures} +\alias{analyzeImportanceFeatures} +\title{Prints as text the detail on a given experiment along with summarized results (if computed)} +\usage{ +analyzeImportanceFeatures( + clf_res, + X, + y, + makeplot = TRUE, + name = "", + verbose = TRUE, + pdf.dims = c(width = 25, height = 20), + filter.perc = 0.05, + filter.cv.prev = 0.25, + nb.top.features = 100, + scaled.importance = FALSE, + k_penalty = 0.75/100, + k_max = 0 +) +} +\arguments{ +\item{clf_res:}{the result of an experiment or multiple exmeriments (list of experimenets)} + +\item{X:}{the X dataset where to compute the abundance and prevalence} + +\item{y:}{the target class} + +\item{makeplot:}{make a pdf file with the resulting plots (default:TRUE)} + +\item{name:}{the suffix of the pdf file (default:"")} + +\item{verbose:}{print out informaiton} + +\item{pdf.dims:}{dimensions of the pdf object (default: c(w = 25, h = 20))} + +\item{filter.perc:}{filter by prevalence percentage in the population between 0 and 1 (default:0.05)} + +\item{filter.cv.prev:}{keep only features found in at least (default: 0.25, i.e 25 percent) of the cross validation experiments} + +\item{nb.top.features:}{the maximum number (default: 100) of most important features to be shown. If this value is NULL +or NA, all features be returned} + +\item{scaled.importance:}{the scaled importance is the importance multipied by the prevalence in the folds. If (default = TRUE) this will be used, the mean mda +will be scaled by the prevalence of the feature in the folds and ordered subsequently} + +\item{k_penalty:}{the sparsity penalty needed to select the best models of the population (default:0.75/100).} + +\item{k_max:}{select the best population below a given threshold. If (default:0) no selection is performed.} +} +\value{ +plots if makeplot is FALSE +} +\description{ +This function takes a population of models and makes three plots, feature prevalence in population, +feature abundance by class and feature prevalence by class +} diff --git a/man/analyzeImportanceFeaturesFBM.Rd b/man/analyzeImportanceFeaturesFBM.Rd new file mode 100644 index 0000000..88680c8 --- /dev/null +++ b/man/analyzeImportanceFeaturesFBM.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyzeImportantFeaturesFBM.R +\name{analyzeImportanceFeaturesFBM} +\alias{analyzeImportanceFeaturesFBM} +\title{Visualize a summary of an experiment/set of experiments} +\usage{ +analyzeImportanceFeaturesFBM( + clf_res, + X, + y, + makeplot = TRUE, + saveplotobj = TRUE, + name = "", + verbose = TRUE, + pdf.dims = c(width = 25, height = 20), + filter.cv.prev = 0.25, + nb.top.features = 100, + scaled.importance = FALSE, + k_penalty = 0.75/100, + k_max = 0 +) +} +\arguments{ +\item{clf_res}{The result of an experiment or multiple experiments +(list of experiments)} + +\item{X}{The feature table used as input of fit function behind experiments +in clf_res} + +\item{y}{The target class (binary/continuous)} + +\item{makeplot}{make a pdf file with the resulting plots (default:TRUE)} + +\item{saveplotobj}{make a .Rda file with a list of the individual plots +(default:TRUE)} + +\item{name}{the suffix of the pdf file (default:"")} + +\item{verbose}{print out informaiton} + +\item{pdf.dims}{dimensions of the pdf object (default: c(w = 25, h = 20))} + +\item{filter.cv.prev}{keep only features found in at least (default: 0.25, +i.e 25 percent) of the cross validation experiments} + +\item{nb.top.features}{the maximum number (default: 100) of most important +features to be shown. +If the number of features in FBM < nb.top.features, the number of features +in FBM will be shown instead} + +\item{scaled.importance}{the scaled importance is the importance multiplied +by the prevalence in the folds. If (default = TRUE) this will be used, the +mean mda will be scaled by the prevalence of the feature in the folds and +ordered subsequently} + +\item{k_penalty}{the sparsity penalty needed to select the best models of +the population (default:0.75/100).} + +\item{k_max}{select the best population below a given threshold. +If (default:0) no selection is performed.} +} +\value{ +plots if makeplot is FALSE; plot.list list object saved locally with +individual plots (including source data) if saveplotobj +} +\description{ +Visualization of 4 panels corresponding to feature prevalence +in FBM, feature importance, feature prevalence in groups, effect sizes of +feature abundances vs y-variable (cliff's delta for binary y; +spearman rho for continuous y). Can be applied to single classification task +or to multiple classification tasks carried out on the same X-y dataset +} diff --git a/man/analyzePopulationFeatures.Rd b/man/analyzePopulationFeatures.Rd new file mode 100644 index 0000000..cd501d4 --- /dev/null +++ b/man/analyzePopulationFeatures.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{analyzePopulationFeatures} +\alias{analyzePopulationFeatures} +\title{Prints as text the detail on a given experiment along with summarized results (if computed)} +\usage{ +analyzePopulationFeatures( + pop, + X, + y, + res_clf, + makeplot = TRUE, + name = "", + ord.feat = "importance", + make.network = TRUE, + network.layout = "circular", + network.alpha = 1e-04, + verbose = TRUE, + pdf.dims = c(width = 25, height = 20), + filter.perc = 0.05, + k_penalty = 0.75/100, + k_max = 0 +) +} +\arguments{ +\item{pop:}{a population of models} + +\item{X:}{the X dataset where to compute the abundance and prevalence} + +\item{y:}{the target class} + +\item{res_clf:}{the results of the classifier as well as the config object} + +\item{makeplot:}{make a pdf file with the resulting plots (default:TRUE)} + +\item{name:}{the suffix of the pdf file (default:"")} + +\item{ord.feat:}{which ordering approch to use for the features (default:importance) in the models, anything +else will compute automatic hierarchical ordering based on the manhattan distance} + +\item{make.network:}{build a network and print it out in the pdf} + +\item{network.layout:}{the network layout by default is circular (layout_in_circle) and will be a weighted Fruchterman-Reingold otherwise} + +\item{network.alpha:}{threshold of significance for the network (default:1e-4)} + +\item{verbose:}{print out informaiton} + +\item{pdf.dims:}{dimensions of the pdf object (default: c(w = 25, h = 20))} + +\item{filter.perc:}{filter by prevalence percentage in the population between 0 and 1 (default:0.05)} + +\item{k_penalty:}{the sparsity penalty needed to select the best models of the population (default:0.75/100).} + +\item{k_max:}{select the best population below a given threshold. If (default:0) no selection is performed.} +} +\value{ +plots if makeplot is FALSE +} +\description{ +This function takes a population of models and makes three plots, feature prevalence in population, +feature abundance by class and feature prevalence by class +} diff --git a/man/bestModelFeatureStability.Rd b/man/bestModelFeatureStability.Rd new file mode 100644 index 0000000..71f1e60 --- /dev/null +++ b/man/bestModelFeatureStability.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stability.lib.R +\name{bestModelFeatureStability} +\alias{bestModelFeatureStability} +\title{analyse stability of models from digest} +\usage{ +bestModelFeatureStability(X, y, clf, digested.result, method = "fuzzy") +} +\arguments{ +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{digested.result:}{the digest result from digest} + +\item{method:}{wether to compute the stability of the best compared to the best in the folds (exact), or the top best (fuzzy)} +} +\value{ +an object with first a list of feature presence tables for each k_sparsity and a list of feature presence frequency +} +\description{ +This function analyses prevalence of features of best model of different sparsity in crossval (here still k-folds) +} diff --git a/man/bestModelStability.Rd b/man/bestModelStability.Rd new file mode 100644 index 0000000..318075b --- /dev/null +++ b/man/bestModelStability.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stability.lib.R +\name{bestModelStability} +\alias{bestModelStability} +\title{analyse stability of models from digest} +\usage{ +bestModelStability(X, y, clf, digested.result, method = "exact") +} +\arguments{ +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{digested.result:}{the digest result from digest} + +\item{method:}{wether to compute the stability of the best compared to the best in the folds (exact), or the top best (fuzzy)} +} +\value{ +an object with first a list of feature presence tables for each k_sparsity and a list of feature presence frequency +} +\description{ +This function analyses prevalence of features of best model of different sparsity in crossval (here still k-folds) +} diff --git a/man/cir_test.Rd b/man/cir_test.Rd new file mode 100644 index 0000000..04416b7 --- /dev/null +++ b/man/cir_test.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{cir_test} +\alias{cir_test} +\title{Cirhosis stage 2 (frequencies)} +\description{ +This dataset consists of frequency abundance files as downloaded from http://waldronlab.io/curatedMetagenomicData/ +This is a list containing two elements: (i) the X data matrix with 1045 species and 56 observations and (ii) patient class = -1 (n=25) and healthy controls (n=31) +} +\author{ +Qin, Nan, Fengling Yang, Ang Li, Edi Prifti, Yanfei Chen, Li Shao, Jing Guo, et al “Alterations of the human gut microbiome in liver cirrhosis.” Nature 513, no. 7516 (July 23, 2014): 59–64. +} +\keyword{cirrhosis,} +\keyword{liver} +\keyword{microbiome,} +\keyword{species} diff --git a/man/cir_train.Rd b/man/cir_train.Rd new file mode 100644 index 0000000..22f220b --- /dev/null +++ b/man/cir_train.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{cir_train} +\alias{cir_train} +\title{Cirhosis stage 1 (frequencies)} +\description{ +This dataset consists of frequency abundance files as downloaded from http://waldronlab.io/curatedMetagenomicData/. +This is a list containing two elements: (i) the X data matrix with 1045 species and 181 observations and (ii) patient class = -1 (n=98) and healthy controls (n=83) +} +\author{ +Qin, Nan, Fengling Yang, Ang Li, Edi Prifti, Yanfei Chen, Li Shao, Jing Guo, et al “Alterations of the human gut microbiome in liver cirrhosis.” Nature 513, no. 7516 (July 23, 2014): 59–64 _. +} +\keyword{cirrhosis,} +\keyword{liver} +\keyword{microbiome,} +\keyword{species} diff --git a/man/cleanPopulation.Rd b/man/cleanPopulation.Rd new file mode 100644 index 0000000..f11e120 --- /dev/null +++ b/man/cleanPopulation.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{cleanPopulation} +\alias{cleanPopulation} +\title{cleanPopulation} +\usage{ +cleanPopulation(pop, clf) +} +\arguments{ +\item{pop:}{is population (a list) of predomics objects} + +\item{clf:}{the classifier object} +} +\value{ +a population of predomics objects +} +\description{ +Looks for invalid predomics objects in a population and removes them. +} diff --git a/man/computeCardEnrichment.Rd b/man/computeCardEnrichment.Rd new file mode 100644 index 0000000..4acd001 --- /dev/null +++ b/man/computeCardEnrichment.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{computeCardEnrichment} +\alias{computeCardEnrichment} +\title{computeCardEnrichment} +\usage{ +computeCardEnrichment(v.card.mat, y) +} +\arguments{ +\item{v.card.mat:}{a dataframe with the cardinality of each feature (columns) and each group in the y vector (rows)} + +\item{y:}{the vector containing the class specification for each sample} +} +\value{ +a data.frame with the statistics computed +} +\description{ +Computes statistic for enrichment of the cardinality of a score for a two class vector +} diff --git a/man/computeCoeffSVMLin.Rd b/man/computeCoeffSVMLin.Rd new file mode 100644 index 0000000..2d98321 --- /dev/null +++ b/man/computeCoeffSVMLin.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{computeCoeffSVMLin} +\alias{computeCoeffSVMLin} +\title{Compute other prediction scores such as precision, recall and f-score} +\usage{ +computeCoeffSVMLin(X, y, clf = NULL, mod = NULL) +} +\arguments{ +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{mod:}{a predomics object to be updated} + +\item{clf:}{an object containing the different parameters of the classifier} +} +\value{ +a model whose evaluation parameters are updated or a list containing coefficients and intercept if mod is not set. +} +\description{ +This function computes prediction scores based on the confusion matrix such as accuracy, precision, recall and f-score +} diff --git a/man/computeConfusionMatrix.Rd b/man/computeConfusionMatrix.Rd new file mode 100644 index 0000000..ca21496 --- /dev/null +++ b/man/computeConfusionMatrix.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{computeConfusionMatrix} +\alias{computeConfusionMatrix} +\title{Evaluates the confusion Matrix of the predicted class and the class to predict} +\usage{ +computeConfusionMatrix(mod, X, y, clf) +} +\arguments{ +\item{mod:}{a model object to be evaluated} + +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} +} +\value{ +a confusion matrix +} +\description{ +This function evaluates the accuracy of a model +} diff --git a/man/computeEffectSizes.Rd b/man/computeEffectSizes.Rd new file mode 100644 index 0000000..a28a1eb --- /dev/null +++ b/man/computeEffectSizes.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyzeImportantFeaturesFBM.R +\name{computeEffectSizes} +\alias{computeEffectSizes} +\title{Compute effect sizes for features in binary classification/regression tasks} +\usage{ +computeEffectSizes(X, y, mode) +} +\arguments{ +\item{X}{The X matrix (rows=features; columns=samples)} + +\item{y}{The y vector of sample class (-1,1 in binary classification; +continuous variable in regression)} + +\item{mode}{classification or regression} +} +\value{ +data frame of features, effect sizes (cliff's delta for binary +classification; spearman rho for regression), and pvalues (wicoxon rank-sum +test for binary classification task; spearman correlation for regression) +} +\description{ +In binary classification tasks, compute the cliff's delta effect +sizes btw groups (1 vs. -1) + pvalues from wilcoxon rank-sum tests; in +regression tasks, compute spearman correlations (rho + pvalue) vs. +continuous y variable +} diff --git a/man/computeFeatureMetrics.Rd b/man/computeFeatureMetrics.Rd new file mode 100644 index 0000000..84297d7 --- /dev/null +++ b/man/computeFeatureMetrics.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{computeFeatureMetrics} +\alias{computeFeatureMetrics} +\title{Computes different metrics for a given distributions} +\usage{ +computeFeatureMetrics(data) +} +\arguments{ +\item{data:}{a data frame containing the data to be treated.} +} +\value{ +a data frame containing different metrics: variance_to_mean, signal_to_noise, variation_coefficient, efficiency and quartile_dispertion +} +\description{ +This function computes to compute a certain number of metrics on a dataset for each variable +(rows, such as prevalence, quartile distribution, etc.) +} diff --git a/man/computeIntercept.Rd b/man/computeIntercept.Rd new file mode 100644 index 0000000..a6d864d --- /dev/null +++ b/man/computeIntercept.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{computeIntercept} +\alias{computeIntercept} +\title{Computes the best intercept for the model while minimizing error} +\usage{ +computeIntercept(score, y, verbose = FALSE, sign = "auto", plot = FALSE) +} +\arguments{ +\item{score:}{the ^y score of the model} + +\item{y:}{the response vector} + +\item{verbose:}{print running information when set to TRUE} + +\item{sign:}{weather the score should be greater or smaller than the intercept (default:"auto")} + +\item{return.all:}{if TRUE, the function will return the intercept as well as the table used to compute it.} + +\item{plot:}{if TRUE, the score will be visialized (default:FALSE)} +} +\value{ +the intercept, the sign and the accuracy +} +\description{ +Computes the best intercept for the model +} diff --git a/man/counter.Rd b/man/counter.Rd new file mode 100644 index 0000000..67de09d --- /dev/null +++ b/man/counter.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{counter} +\alias{counter} +\title{The counter for the experiment id (used in the clf builders)} +\usage{ +counter() +} +\description{ +The counter for the experiment id (used in the clf builders) +} diff --git a/man/crossing.Rd b/man/crossing.Rd new file mode 100644 index 0000000..d00716d --- /dev/null +++ b/man/crossing.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga1.lib.R +\name{crossing} +\alias{crossing} +\title{Creates new combinations of features based from a parents.} +\usage{ +crossing(clf, pop, parents, seed = NULL) +} +\arguments{ +\item{clf:}{the classifier parameter object} + +\item{pop:}{A population (i.e. list) of index vectors} + +\item{parents:}{Indexes of the population pointing to the subset of the population containing the parents (whose genes/features) will be used to create the children.} + +\item{seed:}{For reproductibility purpose to fix the random generator number.} +} +\value{ +a population of models, containing parents and children +} +\description{ +This function is used in terga1 will create new combinations of features based of existing ones from the parents. +} diff --git a/man/denseVecToModel.Rd b/man/denseVecToModel.Rd new file mode 100644 index 0000000..faea6fb --- /dev/null +++ b/man/denseVecToModel.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{denseVecToModel} +\alias{denseVecToModel} +\title{denseVecToModel} +\usage{ +denseVecToModel(X, y, v, clf, eval.all = FALSE, obj = NULL) +} +\arguments{ +\item{X:}{dataset} + +\item{y:}{labels} + +\item{v:}{A vector of coeffs (example v=c(0.0,1.0,0.0,-1.0))} + +\item{clf:}{classifier information} + +\item{eval.all:}{If TRUE the fitting of the function and intercept will be computed} + +\item{obj:}{an object model to add to the model (default:NULL)} +} +\value{ +an model object +} +\description{ +Builds a model object based on model that is in the dense (long) format. +} diff --git a/man/digest.Rd b/man/digest.Rd new file mode 100644 index 0000000..5c01939 --- /dev/null +++ b/man/digest.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{digest} +\alias{digest} +\title{Summarize the results from an experiment object} +\usage{ +digest( + obj, + penalty = NULL, + best.cv = TRUE, + best.k = NULL, + plot = FALSE, + omit.na = TRUE +) +} +\arguments{ +\item{obj:}{The experiment object resulting from the learning process `fit()`} + +\item{penalty:}{A coefficient between 0 and 1, which is applied to penalize +the performance of models as a consequence of model-size. We use this to select +the best model of the population of models (default:NULL)} + +\item{best.cv:}{Should we chose the best model based on information learnerd +cross validation (default:TRUE). This will work if the crossvalidation data is +available. If not the best model will be selected with empirical results.} + +\item{best.k:}{If we do not wish to let the algorithm select the model size, +we can fix this by setting the best.k with an integer indicating the number of +variables in the model (default:NULL).} + +\item{plot:}{Should the digested results be plotted ? (default:FALSE)} + +\item{omit.na:}{Omit data with empty results (default:TRUE)} +} +\value{ +an object with digested information such as the best models for each +model-size, their respective scores, the best model. +} +\description{ +Sumarizes the results of an experiment object of the type +`obj$classifier` and `obj$crossval`. This is different from the digestMC(), +which sumarizes a model collection obj$models +} diff --git a/man/digestModelCollection.Rd b/man/digestModelCollection.Rd new file mode 100644 index 0000000..8b31c56 --- /dev/null +++ b/man/digestModelCollection.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{digestModelCollection} +\alias{digestModelCollection} +\title{digestModelCollection} +\usage{ +digestModelCollection(obj, X = NULL, clf, k.penalty = 0, mmprev = FALSE) +} +\arguments{ +\item{obj:}{a modelCollection object} + +\item{X:}{the dataset (default = NULL)} + +\item{clf:}{the classifier object} + +\item{k.penalty:}{the penalty to apply for sparsity (default:0).} + +\item{mmprev:}{activate the max.min.prevalence selector (default:FALSE)} +} +\value{ +an object with sumarized results such as the best models for each k_sparse, their respective scores, the best model, etc +} +\description{ +Sumarizes the results of a model collection object of the type clf_res$models. This is different from the digest() which sumarizes an experiment clf_res$classifier$models +} +\details{ +Summarize the results from a given modelCollection object +} diff --git a/man/disectModel.Rd b/man/disectModel.Rd new file mode 100644 index 0000000..d031f43 --- /dev/null +++ b/man/disectModel.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{disectModel} +\alias{disectModel} +\title{Analyzes the score construction and model} +\usage{ +disectModel(mod, X, y, clf, plot = TRUE) +} +\arguments{ +\item{mod:}{a model object where the score will be computed} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the class vector} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{plot:}{plot graphical interpretation of if TRUE, (default:TRUE)} +} +\value{ +an object containing statistics on a given model +} +\description{ +Analyzes the score construction and model +} diff --git a/man/estimateFeatureImportance.Rd b/man/estimateFeatureImportance.Rd new file mode 100644 index 0000000..7cc4d5f --- /dev/null +++ b/man/estimateFeatureImportance.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{estimateFeatureImportance} +\alias{estimateFeatureImportance} +\title{Estimates the importance of each feature in the model object} +\usage{ +estimateFeatureImportance( + mod, + X, + y, + clf, + attribute = "unpenalized_fit_", + plot.importance = FALSE +) +} +\arguments{ +\item{mod:}{a model object} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} + +\item{attribute:}{which attribute should be used to compute the importance (default:unpenalized_fit_)} + +\item{plot.importance:}{should the function plot the improtance of the features (default:FALSE)} +} +\value{ +a model object with the importance of each feature computed. Negative importance of a feature means that the feature is not beneficial. +} +\description{ +Estimates the importance of each feature in the model object +} diff --git a/man/evaluateAUC.Rd b/man/evaluateAUC.Rd new file mode 100644 index 0000000..464fb41 --- /dev/null +++ b/man/evaluateAUC.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateAUC} +\alias{evaluateAUC} +\title{Computes the AUC of a model} +\usage{ +evaluateAUC(score, y, sign = ">") +} +\arguments{ +\item{score:}{the ^y score of the model} + +\item{y:}{the response vector} + +\item{sign:}{in which direction to make the comparison? "auto" (default): automatically define in which group +the median is higher and take the direction accordingly. ">": if the predictor values for the control group +are higher than the values of the case group (controls > t >= cases). "<": if the predictor values for the +control group are lower or equal than the values of the case group (controls < t <= cases).} +} +\value{ +an auc value +} +\description{ +Computes the AUC of a model +} diff --git a/man/evaluateAccuracy.Rd b/man/evaluateAccuracy.Rd new file mode 100644 index 0000000..af80185 --- /dev/null +++ b/man/evaluateAccuracy.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateAccuracy} +\alias{evaluateAccuracy} +\title{Evaluates the accuracy of a model} +\usage{ +evaluateAccuracy( + mod = NULL, + X, + y, + clf, + force.re.evaluation = FALSE, + mode = "train" +) +} +\arguments{ +\item{mod:}{a model object to be used in the class prediction} + +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{force.re.evaluation:}{evaluate again all the elements needed for accuracy (default:FALSE)} + +\item{mode:}{training or test mode. If training, the funciton maximizes accuracy.} +} +\value{ +either (1) a model whose evaluation parameters are updated or (2) the accuracy +} +\description{ +This function evaluates the accuracy of either (1) a model object that contains intercept and sign or (2) directly the attributes score, intercept, sign +} diff --git a/man/evaluateAdditionnalMetrics.Rd b/man/evaluateAdditionnalMetrics.Rd new file mode 100644 index 0000000..14a9b06 --- /dev/null +++ b/man/evaluateAdditionnalMetrics.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateAdditionnalMetrics} +\alias{evaluateAdditionnalMetrics} +\title{Compute other prediction scores such as precision, recall and f-score} +\usage{ +evaluateAdditionnalMetrics(mod, X, y, clf, mode = "train") +} +\arguments{ +\item{mod:}{a model object to be evaluated} + +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{mode:}{training or testing mode} +} +\value{ +a model whose evaluation parameters are updated +} +\description{ +This function computes prediction scores based on the confusion matrix such as accuracy, precision, recall and f-score +} diff --git a/man/evaluateFeatureImportanceInPopulation.Rd b/man/evaluateFeatureImportanceInPopulation.Rd new file mode 100644 index 0000000..52053d8 --- /dev/null +++ b/man/evaluateFeatureImportanceInPopulation.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateFeatureImportanceInPopulation} +\alias{evaluateFeatureImportanceInPopulation} +\title{evaluates the feature importance in a population of models} +\usage{ +evaluateFeatureImportanceInPopulation( + pop, + X, + y, + clf, + score = "fit_", + filter.ci = TRUE, + method = "optimized", + seed = c(1:10), + aggregation = "mean", + verbose = TRUE +) +} +\arguments{ +\item{pop:}{a population of models to be considered. This population will be filtered if filter.ci = TRUE (default) using the interval +confidence computed around the best model using a binomial distribution.} + +\item{X:}{dataset used to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{score:}{the attribute of the model to be considered in the evaluation (default:fit_)} + +\item{filter.ci:}{filter the population based on the best model confidence interval (default:TRUE)} + +\item{method:}{Two methods are implemented: the first (extensive), will shuffle feature by feature multiple times and will compute the +evaluation for the whole population of models, which can be very time consuming. The second (optimized) and the default approach consists +on using a different seed when shuffling a given feature and computing the population.} + +\item{seed:}{one or more seeds to be used in the extensive method shuffling (default:c(1:10). For the optimized method only the first seed will be used +and the rest of the seeds that are needed for each model will be incremented from there.} + +\item{aggregation:}{the method to be used to aggregate the evaluation for a the whole population (default: mean), but can be either mean or median.} + +\item{verbose:}{wether to print out information during the execution process.} +} +\value{ +a data.frame with features in rows and the population mean/median score for each model*seed of the population +} +\description{ +This function perturbes the dataset by shuffling one at a time a subset of features that appear in a population of models +and recomputes the evaluation of those models. The mean deltas of the score to consider will give a measure of importance. Two methods +are implemented: the first (extensive), will shuffle feature by feature multiple times and will compute the evaluation for the whole +population of models, which can be very time consuming. The second (optimized) and the default approach consists on using a different +seed when shuffling a given feature and computing the population. In this setting it is not needed to run multiple seeds on the whole +dataset. This procedure is designed to be applied in cross validation. +} diff --git a/man/evaluateFit.Rd b/man/evaluateFit.Rd new file mode 100644 index 0000000..37053e6 --- /dev/null +++ b/man/evaluateFit.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateFit} +\alias{evaluateFit} +\title{Evaluates the fitting score of a model object} +\usage{ +evaluateFit(mod, X, y, clf, force.re.evaluation = FALSE, mode = "train") +} +\arguments{ +\item{mod}{: a model object} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} + +\item{force.re.evaluation:}{re-evaluate all the scores even if they exist (default:FALSE)} + +\item{mode:}{A choice from c("train", "test") indicates wether we wish to learn the threthold +of the model (default:"train") or not "test" for the c("terinter","bininter","ratio") languages} +} +\value{ +a model object with the fitting score +} +\description{ +Evaluates the fitting score of a model object. +} diff --git a/man/evaluateIntercept.Rd b/man/evaluateIntercept.Rd new file mode 100644 index 0000000..051b9a8 --- /dev/null +++ b/man/evaluateIntercept.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateIntercept} +\alias{evaluateIntercept} +\title{Evaluates the fitting score of a model object} +\usage{ +evaluateIntercept(mod, X, y, clf) +} +\arguments{ +\item{mod}{: a model object} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} +} +\value{ +a model object with the fitting score +} +\description{ +Evaluates the fitting score of a model object. +} diff --git a/man/evaluateModel.Rd b/man/evaluateModel.Rd new file mode 100644 index 0000000..583c4fd --- /dev/null +++ b/man/evaluateModel.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateModel} +\alias{evaluateModel} +\title{Evaluates the fitting score of a model object} +\usage{ +evaluateModel( + mod, + X, + y, + clf, + eval.all = FALSE, + force.re.evaluation = FALSE, + estim.feat.importance = FALSE, + mode = "train" +) +} +\arguments{ +\item{mod:}{a model object} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} + +\item{eval.all:}{should the function evaluate all the scores (default:FALSE)} + +\item{force.re.evaluation:}{re-evaluate all the scores even if they exist (default:FALSE)} + +\item{estim.feat.importance:}{evaluate the importance in the model object (default:FALSE)} + +\item{mode:}{A choice from c("train", "test") indicates wether we wish to learn the threthold +of the model (default:"train") or not "test" for the c("terinter","bininter","ratio") languages} +} +\value{ +a model object with the fitting scores evaluated +} +\description{ +Evaluates the fitting score of a model object. +} diff --git a/man/evaluateModelRegression.Rd b/man/evaluateModelRegression.Rd new file mode 100644 index 0000000..34fb842 --- /dev/null +++ b/man/evaluateModelRegression.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateModelRegression} +\alias{evaluateModelRegression} +\title{Evaluates the fitting coefficents of a model object} +\usage{ +evaluateModelRegression( + mod, + X, + y, + clf, + eval.all = FALSE, + force.re.evaluation = FALSE +) +} +\arguments{ +\item{mod:}{a model object} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} + +\item{eval.all:}{should the function evaluate all the scores (default:FALSE)} + +\item{force.re.evaluation:}{re-evaluate all the scores even if they exist (default:FALSE)} +} +\value{ +a model object with the fitting scores evaluated +} +\description{ +Evaluates the fitting coefficients of a model object. +} diff --git a/man/evaluatePopulation.Rd b/man/evaluatePopulation.Rd new file mode 100644 index 0000000..1c91dff --- /dev/null +++ b/man/evaluatePopulation.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluatePopulation} +\alias{evaluatePopulation} +\title{evaluatePopulation} +\usage{ +evaluatePopulation( + X, + y, + clf, + pop, + eval.all = FALSE, + force.re.evaluation = FALSE, + estim.feat.importance = FALSE, + mode = "train", + delete.null.models = TRUE, + lfolds = NULL +) +} +\arguments{ +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the class vector} + +\item{clf:}{the object containing the classifier information} + +\item{pop:}{the population of models to be evaluated} + +\item{eval.all:}{should the function evaluate all the scores for each of the models (default:FALSE)} + +\item{force.re.evaluation:}{re-evaluate all the scores even if they exist for each of the models (default:FALSE)} + +\item{estim.feat.importance:}{evaluate the importance in the model object for each of the models (default:FALSE)} + +\item{mode:}{A choice from c("train", "test") indicates wether we wish to learn the threthold +of each of the models (default:"train") or not "test" for the c("terinter","bininter","ratio") languages} + +\item{delete.null.models:}{should null indivuals be deleted (default:TRUE)} + +\item{lfolds:}{compute evaluation in crossval (default:NULL)} +} +\value{ +an individual object +} +\description{ +Evaluates an entire population of models, that be predomics objects or individuals +} diff --git a/man/evaluatePrevalence.Rd b/man/evaluatePrevalence.Rd new file mode 100644 index 0000000..acaeee8 --- /dev/null +++ b/man/evaluatePrevalence.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluatePrevalence} +\alias{evaluatePrevalence} +\title{Evaluate the prevalence of a given model} +\usage{ +evaluatePrevalence(mod, X) +} +\arguments{ +\item{mod:}{a model object} + +\item{X:}{dataset where to compute the prevalence} +} +\value{ +A vector containing the prevalence of each feature +} +\description{ +Evaluate the prevalence of a given model +} diff --git a/man/evaluateYhat.Rd b/man/evaluateYhat.Rd new file mode 100644 index 0000000..76a2f42 --- /dev/null +++ b/man/evaluateYhat.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{evaluateYhat} +\alias{evaluateYhat} +\title{Computes the predected classification using a given model} +\usage{ +evaluateYhat( + mod = NULL, + X, + y, + clf, + score = NULL, + intercept = NULL, + sign = NULL +) +} +\arguments{ +\item{mod:}{a model object to be used in the class prediction} + +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{score:}{the score passed directly} + +\item{intercept:}{the intercept passed directly} + +\item{sign:}{the sign passed directly} +} +\value{ +a vector with the predicted classification of the samples +} +\description{ +This function evaluates the predicted classification either using (1) a model object that contains intercept and sign or (2) directly the attributes score, intercept, sign +} diff --git a/man/evolve.Rd b/man/evolve.Rd new file mode 100644 index 0000000..3f3f985 --- /dev/null +++ b/man/evolve.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga1.lib.R +\name{evolve} +\alias{evolve} +\title{Creates new combinations of features based from a parents.} +\usage{ +evolve(X, y, clf, pop, seed = NULL) +} +\arguments{ +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} + +\item{pop:}{A population (i.e. list) of index vectors} + +\item{seed:}{For reproductibility purpose to fix the random generator number.} +} +\value{ +a population of models, containing parents and children +} +\description{ +This function is used in terga1 and is the main engine of the algorithm that allows to cross, mutate and select individuals from one generation to the next. +} diff --git a/man/evolve2m.Rd b/man/evolve2m.Rd new file mode 100644 index 0000000..4e0217d --- /dev/null +++ b/man/evolve2m.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{evolve2m} +\alias{evolve2m} +\title{Second version of the evolve method} +\usage{ +evolve2m(X, y, clf, pop, featEval, generation) +} +\arguments{ +\item{X:}{Dataset to classify} + +\item{y:}{Variable to predict} + +\item{clf:}{The claffifier object containing the different parameters} + +\item{pop:}{The population to be evolved} + +\item{featEval:}{A dataframe with the evaluation of each variable of the dataset, +used for some mutator.} +} +\value{ +A list with the size_pop bests of the combination of the old population +with the new population +} +\description{ +This evolve method realize the selection of the parents with +two methods (for the moment) : elite and random. The it tags every selected +individual with the index of it's mate in the population. Then the individual +which sould be mutated are tagged. for each individual of the population we +check if they need to be crossed and/or mutated and the we apply the operations +and create a new individual for each operation applied. +} diff --git a/man/evolve3m.Rd b/man/evolve3m.Rd new file mode 100644 index 0000000..e8601d1 --- /dev/null +++ b/man/evolve3m.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{evolve3m} +\alias{evolve3m} +\title{Another version of the evolve method that distingishes the different lanugages} +\usage{ +evolve3m(X, y, clf, pop, featEval) +} +\arguments{ +\item{X:}{dataset to classify} + +\item{y:}{variable to predict} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{pop:}{the population to be evolved} + +\item{featEval:}{a dataset with the evaluation of each variable of the dataset, used for some mutator.} +} +\value{ +A list with every new individuals +} +\description{ +In a nutshell this evolve method will select the parents using 2 methods (elite and random). +Then it tags every selected individual with the index of it's mate in the population. +Then the individuals which are randomly selected and tagged as to be mutated. For each individual of the +population we check if they need to be crossed and/or mutated and the we apply the operations and +create a new individual for each operation applied. If two parents are of different languages the algorithm +will produce two children that are the same with each having one of the languages. +This as been as a pipeline that could be paralellized with the most efficiency. We could summarize it the following way : +IN -> Evaluation -> initialisation of the tags -> Tagging -> Crossing and mutation -> OUT (newpop) +} diff --git a/man/filterFeaturesByPrevalence.Rd b/man/filterFeaturesByPrevalence.Rd new file mode 100644 index 0000000..69a7b68 --- /dev/null +++ b/man/filterFeaturesByPrevalence.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{filterFeaturesByPrevalence} +\alias{filterFeaturesByPrevalence} +\title{Selects the most prevalent features in the dataset baset on the provided thresholds.} +\usage{ +filterFeaturesByPrevalence( + X, + y = NULL, + nb.prevalence = NULL, + perc.prevalence = NULL, + by.class = TRUE +) +} +\arguments{ +\item{X:}{the dataset X} + +\item{y:}{the class vector (default:NULL)} + +\item{nb.prevalence:}{the minimum number of non zero observations (default: 10)} + +\item{perc.prevalence:}{the percentage of non zero observations (default: NULL)} + +\item{by.class:}{wether the filter should be applied by class (default: TRUE)} +} +\value{ +the filtered dataset, without the features that do not pass the filter. +} +\description{ +Filters out all features that display a prevalence below a given threshold provided as a number of +observations or percentage. This for the total dataset or by class. +} diff --git a/man/filterNoSignal.Rd b/man/filterNoSignal.Rd new file mode 100644 index 0000000..4bd9a0e --- /dev/null +++ b/man/filterNoSignal.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{filterNoSignal} +\alias{filterNoSignal} +\title{filterNoSignal: Omits the variables with no information} +\usage{ +filterNoSignal(X, side = 1, threshold = "auto", verbose = FALSE) +} +\arguments{ +\item{X:}{the dataset to clean} + +\item{side:}{side=1 means that variables are in the rows. Other than 1 it will transpose the dataset} + +\item{threshold:}{auto, will compute the first derivate of the median(sd)/x and will find an automatic threshold. When threshold is a numerical it will be used as a threshold and when is something else, will automatically be 0.} + +\item{verbose:}{print out information when TRUE (default:FALSE)} +} +\description{ +This function will clean a dataset from the variables that have no or little information. +} diff --git a/man/filterfeaturesK.Rd b/man/filterfeaturesK.Rd new file mode 100644 index 0000000..9b18689 --- /dev/null +++ b/man/filterfeaturesK.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{filterfeaturesK} +\alias{filterfeaturesK} +\title{Selects a the top k features that are significantly associated with the class to predict} +\usage{ +filterfeaturesK( + data, + trait, + k = 10, + type = "wilcoxon", + restrict = rep(TRUE, ncol(data)), + multiple.adjust = "BH", + paired = FALSE, + sort = TRUE, + verbose = FALSE, + verbose.step = NULL, + return.data = FALSE +) +} +\arguments{ +\item{data:}{the dataset X} + +\item{trait:}{is the equivalent of y (class, or numerical)} + +\item{k:}{the number of features (default:10)} + +\item{type:}{the statistics to be run (default:wilcoxon)} + +\item{restrict:}{Run the statistics in a subset of the dataset (default: a vector of all TRUE)} + +\item{multiple.adjust:}{the multiple testing adjustment method (default:BH)} + +\item{paired:}{wether paired statistics should be run (default:FALSE)} + +\item{sort:}{return variables sorted by p-value significance (default:TRUE)} + +\item{verbose:}{print out information indicating progress (default:FALSE)} + +\item{verbose.step:}{Showing a 1 percent progress.} + +\item{return.data:}{if (default:FALSE) this returns the statistics of X, otherwise the restricted data subset} +} +\description{ +Runs statistics on the data and selects a subset of k features that are the most significant. +Besides filtering this function can be used in a more larger statistical context. +} diff --git a/man/findk.Rd b/man/findk.Rd new file mode 100644 index 0000000..16e4327 --- /dev/null +++ b/man/findk.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terda.lib.R +\name{findk} +\alias{findk} +\title{Find the number of weights not yet integer.} +\usage{ +findk( + x, + y, + nfolds = 1, + gamma = 1, + k.sparse = NULL, + vartype = "real", + lb = -1, + ub = 1 +) +} +\arguments{ +\item{x}{matrix or dataframe of predictors, of dimension n*p; each row is an observation vector.} + +\item{y}{response variable (1 or -1)} + +\item{nfolds}{k-folds cross-validation that we want test. Dedault value is 1.} + +\item{gamma}{is the hinge loss parameter.. Defines the margin} + +\item{k.sparse}{is the sparsity (non-negative real value). Default value is \code{k.sparse = NULL} - no constraint.} + +\item{vartype}{is the type of coefficients : \code{cplexAPI::CPX_INTEGER, cplexAPI::CPX_BINARY, cplexAPI::CPX_CONTINUOUS}. Default \code{vartype = cplexAPI::CPX_INTEGER}} + +\item{lb}{is the lower bound of coefficients} + +\item{ub}{is the upper bound of coefficients} +} +\value{ +An integer is number of coefficients not yet integer. +} +\description{ +This method return a maximum number of weights of the model not yet integer. +} diff --git a/man/fit.Rd b/man/fit.Rd new file mode 100644 index 0000000..b56d682 --- /dev/null +++ b/man/fit.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predomics.R +\name{fit} +\alias{fit} +\title{fit: runs the classifier on a dataset} +\usage{ +fit( + X, + y, + clf, + cross.validate = FALSE, + lfolds = NULL, + nfolds = 10, + parallelize.folds = TRUE, + compute.importance = TRUE, + return.all = FALSE, + log.file = "parallel.log", + path = NULL +) +} +\arguments{ +\item{X:}{Dataset to classify} + +\item{y:}{Variable to predict} + +\item{clf:}{The classifier object object containing the settings of the classifier} + +\item{cross.validate:}{Whether or not the classification should be run in +cross-validation mode (default:TRUE)} + +\item{lfolds:}{The folds to be used for the cross-validation} + +\item{nfolds:}{The number of folds to use in the cross-validation. If lfolds +are not specified this option allows to set them up (default:10)} + +\item{parallelize.folds:}{Switch setting the parallelization mode based on +cross-validation folds and nothing else in the algorithm (default:TRUE). +This is much more efficient.} + +\item{compute.importance:}{The importance of variables in the learning process +during cross-validation can be computed. This is based on data perturbation +similar to the mean decrease accuracy in the random forest algorithm. Moreover, +this gives feature prevalence in models during CV (default:TRUE)} + +\item{return.all:}{Should all results from the cross-validation steps be +returned. This is usually needed when testing stability of the models +(default:FALSE)} + +\item{log.file:}{The output file for parallel logs (default:'parallel.log')} + +\item{path:}{The path where to save temporary data} +} +\value{ +An experiment object containing the classifier along with the +classification results as a sub-element +} +\description{ +This function runs a learning experiment based on the classifier +object and the given dataset. +} diff --git a/man/generateAllCombinations.Rd b/man/generateAllCombinations.Rd new file mode 100644 index 0000000..9590bf9 --- /dev/null +++ b/man/generateAllCombinations.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terbeam.lib.R +\name{generateAllCombinations} +\alias{generateAllCombinations} +\title{generateAllCombinations} +\usage{ +generateAllCombinations(X, y, clf, ind.features.to.keep, sparsity, allFeatures) +} +\description{ +Generate every possible combination of a list of features and evaluate them +} diff --git a/man/generator_metal.Rd b/man/generator_metal.Rd new file mode 100644 index 0000000..0268169 --- /dev/null +++ b/man/generator_metal.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metal.lib.R +\name{generator_metal} +\alias{generator_metal} +\title{#' Computes best model of a metal clf +#' +#' @description Get best metal model +#' @param X: dataset to classify +#' @param y: variable to predict +#' @param clf: an object containing the different parameters of the classifier +#' @param clf_res: the result of metal +#' @param k_penalty: penalty for k +#' @return A list of result of best model for each k, their importance feature of each best model, individuels wrongly classified +#' @export +getTheBestMetalModel<- function(clf, clf_res, X, k_penalty=0.01, evalToOrder="accuracy_",selected=1) +{ + if(length(clf_res)==3){ + clf_res<-clf_res$classifier + } + pop<-modelCollectionToPopulation(clf_res$models) + acc <- populationGet_X(evalToOrder)(pop) + k <- populationGet_X("eval.sparsity")(pop) + acc.penalty <- acc-(k*k_penalty) + best.acc <- max(acc.penalty) + epsilon <- sqrt(best.acc*(1-best.acc)/ncol(X)) + pop2 <- pop[acc.penalty>(best.acc - epsilon)] + mod <- getMaxMinPrevalenceModel(pop2,X,selected=selected) + return(mod) +} +Generate a metal list of clfs containing information on the generators and unificators} +\usage{ +generator_metal(mat, clf = NULL) +} +\arguments{ +\item{mat:}{a language/learner presence matrix that indicates which algorithms and which languages to explore} + +\item{clf:}{a metal classifier object, with the parameter list.clfs that can be "NULL"} +} +\value{ +a list of clfs +} +\description{ +Generate a metal list of clfs containing information on the generators and unificators +} diff --git a/man/getFeaturePrevalence.Rd b/man/getFeaturePrevalence.Rd new file mode 100644 index 0000000..a65ed2d --- /dev/null +++ b/man/getFeaturePrevalence.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getFeaturePrevalence} +\alias{getFeaturePrevalence} +\title{Evaluates the prevalence of a list of features in the whole dataset and per each class} +\usage{ +getFeaturePrevalence(features, X, y = NULL, prop = TRUE, zero.value = 0) +} +\arguments{ +\item{features:}{a list of features or features indexes for which we wish to compute prevalence} + +\item{X:}{dataset where to compute the prevalence} + +\item{y:}{if provided it will also compute hte prevalence per each class (default:NULL)} + +\item{prop:}{weather to compute the prevalence in number or as a proportion (default:TRUE)} + +\item{zero.value:}{the value that specifies what is zero. This can be a different than 0 in log transformed data for instance (default = 0)} +} +\value{ +A list containing the prevalence in the whole dataset as well as classes (if provided) +} +\description{ +Evaluate the prevalence of a given model +} diff --git a/man/getFitIndividual.Rd b/man/getFitIndividual.Rd new file mode 100644 index 0000000..99fb0cd --- /dev/null +++ b/man/getFitIndividual.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getFitIndividual} +\alias{getFitIndividual} +\title{Get the fitting score of an individual object} +\usage{ +getFitIndividual(individual) +} +\arguments{ +\item{individual}{: an individual object} +} +\value{ +a fitting score +} +\description{ +Get the fitting score of an individual object. +} diff --git a/man/getFitModel.Rd b/man/getFitModel.Rd new file mode 100644 index 0000000..01b0507 --- /dev/null +++ b/man/getFitModel.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getFitModel} +\alias{getFitModel} +\title{Get the fitting score of a model object} +\usage{ +getFitModel(mod) +} +\arguments{ +\item{mod}{: a model object} +} +\value{ +a fitting score +} +\description{ +Get the fitting score of a model object. +} diff --git a/man/getFitModels.Rd b/man/getFitModels.Rd new file mode 100644 index 0000000..ec0f378 --- /dev/null +++ b/man/getFitModels.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getFitModels} +\alias{getFitModels} +\title{Get the fitting score of a list a models} +\usage{ +getFitModels(pop) +} +\arguments{ +\item{pop}{: a list of models} +} +\value{ +a vector of fitting scores +} +\description{ +Get the fitting score of a list a models. +} diff --git a/man/getFitPopulation.Rd b/man/getFitPopulation.Rd new file mode 100644 index 0000000..db21cc0 --- /dev/null +++ b/man/getFitPopulation.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getFitPopulation} +\alias{getFitPopulation} +\title{Get the fitting score of a list of individuals} +\usage{ +getFitPopulation(pop) +} +\arguments{ +\item{pop}{: a list of individuals} +} +\value{ +a vector of fitting scores +} +\description{ +Get the fitting score of a list of individuals. +} diff --git a/man/getGraph.Rd b/man/getGraph.Rd new file mode 100644 index 0000000..cb517c2 --- /dev/null +++ b/man/getGraph.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stability.lib.R +\name{getGraph} +\alias{getGraph} +\title{getGraph} +\usage{ +getGraph(mat, X, threshold = 0) +} +\arguments{ +\item{X:}{dataset to classify} + +\item{mat:}{AnalyseStableModels()$origin} + +\item{threshold:}{} +} +\value{ +a graph +} +\description{ +This function gets a graph of the result of analyseStableModels +} diff --git a/man/getImportanceFeaturesFBMobjects.Rd b/man/getImportanceFeaturesFBMobjects.Rd new file mode 100644 index 0000000..cf637bb --- /dev/null +++ b/man/getImportanceFeaturesFBMobjects.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyzeImportantFeaturesFBM.R +\name{getImportanceFeaturesFBMobjects} +\alias{getImportanceFeaturesFBMobjects} +\title{Get objects needed for a merged visualization task combining different +experiments from different datasets (different X and y)} +\usage{ +getImportanceFeaturesFBMobjects( + clf_res, + X, + y, + verbose = TRUE, + filter.cv.prev = 0.25, + scaled.importance = FALSE, + k_penalty = 0.75/100, + k_max = 0 +) +} +\arguments{ +\item{clf_res}{The result of a single experiment} + +\item{X}{The feature table used as input of fit function behind experiments +in clf_res} + +\item{y}{The target class (binary/continuous)} + +\item{verbose}{print out informaiton} + +\item{filter.cv.prev}{keep only features found in at least (default: 0.25, +i.e 25 percent) of the cross validation experiments} + +\item{scaled.importance}{the scaled importance is the importance multipied +by the prevalence in the folds. If (default = TRUE) this will be used, the +mean mda will be scaled by the prevalence of the feature in the folds and +ordered subsequently} + +\item{k_penalty}{the sparsity penalty needed to select the best models of the +population (default:0.75/100).} + +\item{k_max}{select the best population below a given threshold. If (default:0) +no selection is performed.} +} +\value{ +list of objects for subsequent combination +} +\description{ +Here we get the 4 datasets from a given prediction experiment +(clf object + X + y) needed for subsequent combination with other +predition experiments for combined visualization (feature prevalence in +FBM + feature importance + featureEffSizes + feature prevalence in groups) +} diff --git a/man/getIndicesIndividual.Rd b/man/getIndicesIndividual.Rd new file mode 100644 index 0000000..7fec456 --- /dev/null +++ b/man/getIndicesIndividual.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getIndicesIndividual} +\alias{getIndicesIndividual} +\title{Get the index of the features in a given individual} +\usage{ +getIndicesIndividual(individual) +} +\arguments{ +\item{individual}{: an individual object} +} +\value{ +the indices of the features +} +\description{ +Get the indices of the features used in the individuals +} diff --git a/man/getIndicesPopulation.Rd b/man/getIndicesPopulation.Rd new file mode 100644 index 0000000..644ffca --- /dev/null +++ b/man/getIndicesPopulation.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getIndicesPopulation} +\alias{getIndicesPopulation} +\title{Get the indices of the features used in a population of individuals} +\usage{ +getIndicesPopulation(pop) +} +\arguments{ +\item{pop}{: a list of individuals} +} +\value{ +a matrix of indices (rows), and individuals (cols) +} +\description{ +Get the indices of the features used in a population of individuals +} diff --git a/man/getMaxMinPrevalenceModel.Rd b/man/getMaxMinPrevalenceModel.Rd new file mode 100644 index 0000000..8c22cf4 --- /dev/null +++ b/man/getMaxMinPrevalenceModel.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getMaxMinPrevalenceModel} +\alias{getMaxMinPrevalenceModel} +\title{Get the model that has the highest minimal prevalence in its features} +\usage{ +getMaxMinPrevalenceModel(pop, X = NULL, evalToOrder = "fit_", selected = 0) +} +\arguments{ +\item{pop:}{a population of model objects} + +\item{X:}{dataset where to compute the prevalence} + +\item{evalToOrder:}{which score should we use to order the models and select them (default:fit_)} + +\item{selected:}{the number of selected models (default:0). If 0, everything is returned.} +} +\value{ +a model or a list of model objects +} +\description{ +Get the model that has the highest minimal prevalence in its features +} diff --git a/man/getModelScore.Rd b/man/getModelScore.Rd new file mode 100644 index 0000000..4bf4c18 --- /dev/null +++ b/man/getModelScore.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getModelScore} +\alias{getModelScore} +\title{Computes the ^y score of the model} +\usage{ +getModelScore(mod, X, clf, force.re.evaluation = TRUE) +} +\arguments{ +\item{mod:}{a model object where the score will be computed} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{force.re.evaluation:}{we recompute the score (default:TRUE)} +} +\value{ +a vector containing the predicted ^y score for each observation +} +\description{ +Returns the ^y score of the model +} diff --git a/man/getNBestModels.Rd b/man/getNBestModels.Rd new file mode 100644 index 0000000..edf193a --- /dev/null +++ b/man/getNBestModels.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getNBestModels} +\alias{getNBestModels} +\title{Get the models from a classifier result for each k-sparsity} +\usage{ +getNBestModels( + obj, + significance = FALSE, + by.k.sparsity = TRUE, + k.penalty = 0, + n.best = 5, + single.best = FALSE, + single.best.cv = TRUE, + single.best.k = NULL, + max.min.prevalence = FALSE, + X = NULL, + verbose = FALSE, + evalToOrder = "fit_", + return.population = FALSE, + unique.control = TRUE +) +} +\arguments{ +\item{obj:}{the classifier result output from the function fit. This can also be a ModelCollection or Population object} + +\item{significance:}{if TRUE, (default:FALSE) a statistical test will be applied to find the lowest threshold that will delimit the window +of the best models. If FALSE, the models will be selected according to the rest of the criteria.} + +\item{by.k.sparsity:}{if TRUE (default:TRUE), the filtering will be performed for each sparsity level} + +\item{k.penalty:}{(default:0), it will penalize the models with large sparsity if different, when by.k.sparsity is set to TRUE} + +\item{n.best:}{the number of best models to be returned for each sparsity if by.k.sparsity is set to TRUE or for the whole population +otherwise (default:5).} + +\item{nbest:}{the number of best models we wish to get from the population, per each sparsity or not. If there are less best models then this +number, less will be returned} + +\item{single.best:}{if TRUE, this will return the best model of all (default:FALSE) and the n.best will be set to 1.} + +\item{single.best.cv:}{if single.best is TRUE, we could chose the best model based on data from cross validation (default:TRUE) and in this +case obj should be an experiment or from empirical results not in CV.} + +\item{single.best.k:}{if single.best is TRUE, we could chose the best model of a given sparsity that is specified by a number here. +If this value is specified (default:NULL), then this will de-actvate single.best.cv.} + +\item{max.min.prevalence:}{if TRUE (default:FALSE), the best models will be selected based on their performance but also on the prevalence of +the features that compose it.} + +\item{X:}{the dataset to be learned (default:NULL). This is neeeded when max.min.prevalence is set to TRUE.} + +\item{verbose:}{provide more information about execution (default = FALSE)} + +\item{evalToOrder:}{which attribute of the model object should we use to order the models and select them (default:fit_)} + +\item{return.population:}{if set to TRUE (default:FALSE), the result will be send as a population of models} + +\item{unique.control:}{if set to TRUE (default:TRUZ), we correct the population so that no dupplication of models takes place} +} +\value{ +a list of model objects or a model when it is a single one or a model collection +} +\description{ +Get the N best models from a classifier result for each k-sparsity. +} diff --git a/man/getSign.Rd b/man/getSign.Rd new file mode 100644 index 0000000..d54190a --- /dev/null +++ b/man/getSign.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{getSign} +\alias{getSign} +\title{Evaluates the sign for a given feature this is the old getMgsVsTraitSignDiscr function} +\usage{ +getSign(X, y, clf = NULL, parallel.local = FALSE) +} +\arguments{ +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} + +\item{parallel.local:}{weather or not to run in //} +} +\value{ +a vector of +1 & -1 for each variable +} +\description{ +Evaluates the sign for a given feature this is the old getMgsVsTraitSignDiscr function. +} diff --git a/man/get_IndividualToBeMutated.Rd b/man/get_IndividualToBeMutated.Rd new file mode 100644 index 0000000..0d6a9b0 --- /dev/null +++ b/man/get_IndividualToBeMutated.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{get_IndividualToBeMutated} +\alias{get_IndividualToBeMutated} +\title{Return list of individuals to mutate} +\usage{ +get_IndividualToBeMutated(pop) +} +\arguments{ +\item{pop:}{population list} +} +\value{ +a list of individuals (= a population) containing all the individuals selected for mutation +} +\description{ +Wraper function that returns the list of individuals that are going to go through +the mutate function (normaly taged with \link[predomics]{tag_SelectRandom}) +} diff --git a/man/get_Parents.Rd b/man/get_Parents.Rd new file mode 100644 index 0000000..673512b --- /dev/null +++ b/man/get_Parents.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{get_Parents} +\alias{get_Parents} +\title{Return list of parents} +\usage{ +get_Parents(pop) +} +\arguments{ +\item{pop:}{population list} +} +\value{ +a list of individuals (= a population) containing all the selected parents +} +\description{ +Wraper function that returns the list of parents (normaly taged with +\link[predomics]{tag_SelectRandom}) +} diff --git a/man/glmnetRR.Rd b/man/glmnetRR.Rd new file mode 100644 index 0000000..6a3024f --- /dev/null +++ b/man/glmnetRR.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terda.lib.R +\name{glmnetRR} +\alias{glmnetRR} +\title{Solve with GLMNET and create models} +\usage{ +glmnetRR(clf, X, y) +} +\description{ +Create Models by applying randomized roundings on the a solution given by GLMNET +} diff --git a/man/ibd.Rd b/man/ibd.Rd new file mode 100644 index 0000000..e60ee8e --- /dev/null +++ b/man/ibd.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{ibd} +\alias{ibd} +\title{Inflammatory Bowel Disease (frequencies) from the MetaHIT study} +\description{ +This dataset consists of frequency abundance files as downloaded from http://waldronlab.io/curatedMetagenomicData/ +This is a list containing two elements: (i) the X data matrix with 1045 species and 396 observations and (ii) patient class = -1 (n=148) and healthy controls (n=248) +} +\author{ +Nielsen, H Bjørn, Mathieu Almeida, Agnieszka Sierakowska Juncker, Simon Rasmussen, Junhua Li, Shinichi Sunagawa, Damian R Plichta, et al “Identification and assembly of genomes and genetic elements in complex metagenomic samples without using reference genomes.” Nature biotechnology (July 6, 2014): 1–11. +} +\keyword{bowel} +\keyword{disease,} +\keyword{inflamatory} +\keyword{microbiome,} +\keyword{species} diff --git a/man/index2names.Rd b/man/index2names.Rd new file mode 100644 index 0000000..691a0b2 --- /dev/null +++ b/man/index2names.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{index2names} +\alias{index2names} +\title{index2names} +\usage{ +index2names(X, var.ind) +} +\arguments{ +\item{X:}{the dataset} + +\item{var.ind:}{the feature index vector} +} +\value{ +the names of the features +} +\description{ +Transforms feature indexes into feature names +} diff --git a/man/individual.Rd b/man/individual.Rd new file mode 100644 index 0000000..fba0261 --- /dev/null +++ b/man/individual.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{individual} +\alias{individual} +\title{Creates an object individual} +\usage{ +individual( + X, + y, + clf, + coeffs = NULL, + ind = NULL, + eval.all = FALSE, + signs = NULL, + obj = NULL, + res_clf = NULL +) +} +\arguments{ +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the class vector} + +\item{clf:}{the object containing the classifier information} + +\item{ind:}{the indexes of the variables forming the individual could be null if we give the function a dense vector (via the coeff parameter) or if we also want to generate the individual} + +\item{coeffs:}{the coefficients of the model, it could be a dense vector (in this case, ind need to be null), or it could be only the non zero values, or if it's null a new individual will be genrated} + +\item{obj:}{an object to be incorporated in the model (default:NULL). We use this usually for SOTA.} + +\item{res_clf:}{if provided information on mda etc can be found and transmitted to the model object} +} +\value{ +an individual (model) object +} +\description{ +Creates an object individual +} diff --git a/man/isClf.Rd b/man/isClf.Rd new file mode 100644 index 0000000..c0565d3 --- /dev/null +++ b/man/isClf.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isClf} +\alias{isClf} +\title{Evaluates wether an object is a classifier} +\usage{ +isClf(obj) +} +\arguments{ +\item{obj:}{an object to test} +} +\value{ +TRUE if the object is a classifier +} +\description{ +Evaluates wether an object is a classifier +} diff --git a/man/isExperiment.Rd b/man/isExperiment.Rd new file mode 100644 index 0000000..5f1a500 --- /dev/null +++ b/man/isExperiment.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isExperiment} +\alias{isExperiment} +\title{Evaluates wether an object is an experiment} +\usage{ +isExperiment(obj) +} +\arguments{ +\item{obj:}{an object to test} +} +\value{ +TRUE if the object is an experiment +} +\description{ +Evaluates wether an object is an experiment +} diff --git a/man/isLearnerSota.Rd b/man/isLearnerSota.Rd new file mode 100644 index 0000000..c34088b --- /dev/null +++ b/man/isLearnerSota.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isLearnerSota} +\alias{isLearnerSota} +\title{Evaluates wether an object is a model SOTA SVM} +\usage{ +isLearnerSota(obj) +} +\arguments{ +\item{obj:}{a model to test} +} +\value{ +TRUE if the object is a SOTA learner +} +\description{ +Evaluates wether a learner is SOTA or not +} diff --git a/man/isModel.Rd b/man/isModel.Rd new file mode 100644 index 0000000..8864d9a --- /dev/null +++ b/man/isModel.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModel} +\alias{isModel} +\title{Evaluates wether an object is a model} +\usage{ +isModel(obj) +} +\arguments{ +\item{obj:}{an object to test} +} +\value{ +TRUE if the object is a model +} +\description{ +Evaluates wether an object is a model +} diff --git a/man/isModelBTR.Rd b/man/isModelBTR.Rd new file mode 100644 index 0000000..cedee41 --- /dev/null +++ b/man/isModelBTR.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModelBTR} +\alias{isModelBTR} +\title{Evaluates wether an object is a model BTR} +\usage{ +isModelBTR(obj) +} +\arguments{ +\item{obj:}{a model to test} +} +\value{ +TRUE if the object is a model BTR +} +\description{ +Evaluates wether an object is a model of type BTR +} diff --git a/man/isModelCollection.Rd b/man/isModelCollection.Rd new file mode 100644 index 0000000..e874dbd --- /dev/null +++ b/man/isModelCollection.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModelCollection} +\alias{isModelCollection} +\title{Evaluates wether an object is a model collection objecct} +\usage{ +isModelCollection(obj) +} +\arguments{ +\item{obj:}{an object to test} +} +\value{ +TRUE if the object is a model collection objecct +} +\description{ +Evaluates wether an object is a model collection objecct +} diff --git a/man/isModelSota.Rd b/man/isModelSota.Rd new file mode 100644 index 0000000..1020849 --- /dev/null +++ b/man/isModelSota.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModelSota} +\alias{isModelSota} +\title{Evaluates wether an object is a model SOTA} +\usage{ +isModelSota(obj) +} +\arguments{ +\item{obj:}{a model to test} +} +\value{ +TRUE if the object is a model sota +} +\description{ +Evaluates wether an object is a model of type sota +} diff --git a/man/isModelSotaGLMNET.Rd b/man/isModelSotaGLMNET.Rd new file mode 100644 index 0000000..c0e9343 --- /dev/null +++ b/man/isModelSotaGLMNET.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModelSotaGLMNET} +\alias{isModelSotaGLMNET} +\title{Evaluates wether an object is a model SOTA GLMNET} +\usage{ +isModelSotaGLMNET(obj) +} +\arguments{ +\item{mod:}{a model to test} +} +\value{ +TRUE if the object is a model sota GLMNET +} +\description{ +Evaluates wether an object is a model of type sota +} diff --git a/man/isModelSotaRF.Rd b/man/isModelSotaRF.Rd new file mode 100644 index 0000000..fe0afcc --- /dev/null +++ b/man/isModelSotaRF.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModelSotaRF} +\alias{isModelSotaRF} +\title{Evaluates wether an object is a model SOTA RF} +\usage{ +isModelSotaRF(obj) +} +\arguments{ +\item{mod:}{a model to test} +} +\value{ +TRUE if the object is a model sota RF +} +\description{ +Evaluates wether an object is a model of type sota +} diff --git a/man/isModelSotaSVM.Rd b/man/isModelSotaSVM.Rd new file mode 100644 index 0000000..4611e14 --- /dev/null +++ b/man/isModelSotaSVM.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModelSotaSVM} +\alias{isModelSotaSVM} +\title{Evaluates wether an object is a model SOTA SVM} +\usage{ +isModelSotaSVM(obj) +} +\arguments{ +\item{obj:}{a model to test} +} +\value{ +TRUE if the object is a model sota SVM +} +\description{ +Evaluates wether an object is a model of type sota +} diff --git a/man/isModelTerda.Rd b/man/isModelTerda.Rd new file mode 100644 index 0000000..0323fa3 --- /dev/null +++ b/man/isModelTerda.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isModelTerda} +\alias{isModelTerda} +\title{Evaluates wether an object is a model BTR Terda} +\usage{ +isModelTerda(obj) +} +\arguments{ +\item{mod:}{a model to test} +} +\value{ +TRUE if the object is a model BTR Terda +} +\description{ +Evaluates wether an object is a model of type BTR +} diff --git a/man/isPopulation.Rd b/man/isPopulation.Rd new file mode 100644 index 0000000..ec2bd96 --- /dev/null +++ b/man/isPopulation.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{isPopulation} +\alias{isPopulation} +\title{Evaluates wether an object is a population of models} +\usage{ +isPopulation(obj) +} +\arguments{ +\item{obj:}{an object to test} +} +\value{ +TRUE if the object is a population +} +\description{ +Evaluates wether an object is a population of models +} diff --git a/man/isclose.Rd b/man/isclose.Rd new file mode 100644 index 0000000..437593b --- /dev/null +++ b/man/isclose.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/minitools.R +\name{isclose} +\alias{isclose} +\title{tests weather two values are close} +\usage{ +isclose(x, y, e = 1e-10) +} +\arguments{ +\item{x:}{condition to be tested} + +\item{y:}{message to be printed} +} +\value{ +TRUE when the distance of two numbers is smaller than a given value +} +\description{ +Asserts wether two vectors of the same length are close in value + below a given threshold +} diff --git a/man/listOfDenseVecToListOfModels.Rd b/man/listOfDenseVecToListOfModels.Rd new file mode 100644 index 0000000..d30b005 --- /dev/null +++ b/man/listOfDenseVecToListOfModels.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{listOfDenseVecToListOfModels} +\alias{listOfDenseVecToListOfModels} +\title{Builds a model object from a list of vector coefficients} +\usage{ +listOfDenseVecToListOfModels(X, y, clf, v, lobj = NULL) +} +\arguments{ +\item{X:}{dataset} + +\item{y:}{labels} + +\item{clf:}{classifier} + +\item{v:}{list of vectors of coeffs. For example, v=list( c(0.0,1.0,0.0,-1.0) , c(1.0,1.0,0.0,0.0) , c(0.0,1.0,1.0,-1.0) )} + +\item{lobj:}{a list of objects to add as elements in the model objects if not null (default:NULL)} +} +\value{ +an model object +} +\description{ +Builds a model object from a list of vector coefficients. +} diff --git a/man/listOfDenseVecToModelCollection.Rd b/man/listOfDenseVecToModelCollection.Rd new file mode 100644 index 0000000..c80d680 --- /dev/null +++ b/man/listOfDenseVecToModelCollection.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{listOfDenseVecToModelCollection} +\alias{listOfDenseVecToModelCollection} +\title{Builds a list of dense vector coefficients from a list of models} +\usage{ +listOfDenseVecToModelCollection(clf, X, y, v) +} +\arguments{ +\item{clf:}{classifier} + +\item{X:}{dataset} + +\item{y:}{labels} + +\item{v:}{list of dense vectors} +} +\value{ +a model collection +} +\description{ +Builds a list of dense vector coefficients from a list of models +} diff --git a/man/listOfModels2ModelCollection.Rd b/man/listOfModels2ModelCollection.Rd new file mode 100644 index 0000000..9b04001 --- /dev/null +++ b/man/listOfModels2ModelCollection.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{listOfModels2ModelCollection} +\alias{listOfModels2ModelCollection} +\title{listOfModels2ModelCollection} +\usage{ +listOfModels2ModelCollection(pop, nBest = NA) +} +\arguments{ +\item{pop:}{is population (a list) of predomics objects} + +\item{nBest:}{number of elements to return for each sparsity (default:NA)} +} +\value{ +an model collection object +} +\description{ +Structures a list of predomics objects into a structured collection by k_sparsity. +} diff --git a/man/listOfModelsToDenseCoefMatrix.Rd b/man/listOfModelsToDenseCoefMatrix.Rd new file mode 100644 index 0000000..91cbdf2 --- /dev/null +++ b/man/listOfModelsToDenseCoefMatrix.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{listOfModelsToDenseCoefMatrix} +\alias{listOfModelsToDenseCoefMatrix} +\title{listOfModelsToDenseCoefMatrix} +\usage{ +listOfModelsToDenseCoefMatrix( + clf, + X, + y, + list.models, + rm.empty = TRUE, + order.row = TRUE +) +} +\arguments{ +\item{clf:}{the classifier object} + +\item{X:}{the dataset} + +\item{y:}{the class vector} + +\item{list.model:}{a list of model objects} + +\item{rm.empty:}{remove null models in the list if any (default:TRUE)} + +\item{order.row:}{order rows by occurence (default:TRUE)} +} +\value{ +an data frame with model coefficients in rows +} +\description{ +For each model in the list of models it will convert to dense format and convert to a data.frame +} diff --git a/man/listOfModelsToListOfDenseVec.Rd b/man/listOfModelsToListOfDenseVec.Rd new file mode 100644 index 0000000..cb64671 --- /dev/null +++ b/man/listOfModelsToListOfDenseVec.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{listOfModelsToListOfDenseVec} +\alias{listOfModelsToListOfDenseVec} +\title{Builds a list of dense vector coefficients from a list of models} +\usage{ +listOfModelsToListOfDenseVec(clf, X, y, list.models) +} +\arguments{ +\item{clf:}{classifier} + +\item{X:}{dataset} + +\item{y:}{labels} + +\item{list.models:}{list of models} +} +\value{ +a list of dense vectors of coefficient +} +\description{ +Builds a list of dense vector coefficients from a list of models +} diff --git a/man/listOfModelsToListOfSparseVec.Rd b/man/listOfModelsToListOfSparseVec.Rd new file mode 100644 index 0000000..adb9a3e --- /dev/null +++ b/man/listOfModelsToListOfSparseVec.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{listOfModelsToListOfSparseVec} +\alias{listOfModelsToListOfSparseVec} +\title{Builds a list of sparse vector coefficients from a list of models} +\usage{ +listOfModelsToListOfSparseVec(list.models) +} +\arguments{ +\item{list.models:}{list of models} +} +\value{ +a list of dense vectors of coefficient +} +\description{ +Builds a list of sparse vector coefficients from a list of models +} diff --git a/man/listOfSparseVecToListOfModels.Rd b/man/listOfSparseVecToListOfModels.Rd new file mode 100644 index 0000000..4cbeff9 --- /dev/null +++ b/man/listOfSparseVecToListOfModels.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{listOfSparseVecToListOfModels} +\alias{listOfSparseVecToListOfModels} +\title{listOfSparseVecToListOfModels} +\usage{ +listOfSparseVecToListOfModels(X, y, clf, v, lobj = NULL, eval.all = FALSE) +} +\arguments{ +\item{X:}{dataset} + +\item{y:}{labels} + +\item{clf:}{classifier} + +\item{v:}{list of vectors of coeffs. For example, v=list( c(0.0,1.0,0.0,-1.0) , c(1.0,1.0,0.0,0.0) , c(0.0,1.0,1.0,-1.0) )} + +\item{lobj:}{a list of objects to add as elements in the model objects if not null (default:NULL)} + +\item{eval.all:}{evaluate population (default:FALSE)} +} +\value{ +an model object +} +\description{ +Converts an list of "SparseVec" objects onto a list of predomics objects +} diff --git a/man/loadPopulation.Rd b/man/loadPopulation.Rd new file mode 100644 index 0000000..a644db2 --- /dev/null +++ b/man/loadPopulation.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{loadPopulation} +\alias{loadPopulation} +\title{Load a population from a file} +\usage{ +loadPopulation(fileName) +} +\arguments{ +\item{fileName:}{The name of the file were the population is stored} +} +\value{ +a population object +} +\description{ +This function is used to load a population from a file on your disk (it must be in your working directory) +} diff --git a/man/loadResults.Rd b/man/loadResults.Rd new file mode 100644 index 0000000..570b4ba --- /dev/null +++ b/man/loadResults.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{loadResults} +\alias{loadResults} +\title{Load the results of a fit} +\usage{ +loadResults(fileName) +} +\description{ +Load the results of a fit +} diff --git a/man/make.counter.Rd b/man/make.counter.Rd new file mode 100644 index 0000000..a5b57c6 --- /dev/null +++ b/man/make.counter.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{make.counter} +\alias{make.counter} +\title{Function used to create the counter for building clf$experiment$id} +\usage{ +make.counter() +} +\description{ +Function used to create the counter for building clf$experiment$id +} diff --git a/man/makeFeatureAnnot.Rd b/man/makeFeatureAnnot.Rd new file mode 100644 index 0000000..f5ca88a --- /dev/null +++ b/man/makeFeatureAnnot.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{makeFeatureAnnot} +\alias{makeFeatureAnnot} +\title{Prints as text the detail on a given experiment along with summarized results (if computed)} +\usage{ +makeFeatureAnnot(pop, X, y, clf) +} +\arguments{ +\item{pop:}{a population of models} + +\item{X:}{the X dataset where to compute the abundance and prevalence} + +\item{y:}{the target class} + +\item{clf:}{an object containing the different parameters of the classifier} +} +\value{ +a list with two data.frames one containing the coefficients per each model and the other a data.frame on the features +} +\description{ +This function takes a population of models and creates a table with annotation on the features, +such as prevalence in the models and dataset as well as different statistics +} diff --git a/man/makeFeatureModelPrevalenceNetworkCooccur.Rd b/man/makeFeatureModelPrevalenceNetworkCooccur.Rd new file mode 100644 index 0000000..5d44251 --- /dev/null +++ b/man/makeFeatureModelPrevalenceNetworkCooccur.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{makeFeatureModelPrevalenceNetworkCooccur} +\alias{makeFeatureModelPrevalenceNetworkCooccur} +\title{Prints as text the detail on a given experiment along with summarized results (if computed)} +\usage{ +makeFeatureModelPrevalenceNetworkCooccur( + pop.noz, + feature.annot, + alpha = 0.05, + verbose = TRUE, + layout = "circlular" +) +} +\arguments{ +\item{pop.noz:}{a data.frame of in features in the rows and models in the columns. +This table contains the feature coefficients in the models and is obtained by makeFeatureAnnot()} + +\item{feature.annot:}{a data frame with annotation on features obtained by makeFeatureAnnot()} + +\item{alpha:}{the significane p-value of the co-occurance.} + +\item{verbose:}{print out information during run} + +\item{layout:}{the network layout by default is circular (layout_in_circle) and will be a weighted Fruchterman-Reingold otherwise} +} +\value{ +plots a graph +} +\description{ +This function will use the coocur package to compute the co-occurance of features in a population of models +} diff --git a/man/makeFeatureModelPrevalenceNetworkMiic.Rd b/man/makeFeatureModelPrevalenceNetworkMiic.Rd new file mode 100644 index 0000000..2ac7b54 --- /dev/null +++ b/man/makeFeatureModelPrevalenceNetworkMiic.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{makeFeatureModelPrevalenceNetworkMiic} +\alias{makeFeatureModelPrevalenceNetworkMiic} +\title{Prints as text the detail on a given experiment along with summarized results (if computed)} +\usage{ +makeFeatureModelPrevalenceNetworkMiic( + pop.noz, + feature.annot, + cor.th = 0.3, + verbose = TRUE, + layout = "circlular" +) +} +\arguments{ +\item{pop.noz:}{a data.frame of in features in the rows and models in the columns. +This table contains the feature coefficients in the models and is obtained by makeFeatureAnnot()} + +\item{feature.annot:}{a data frame with annotation on features obtained by makeFeatureAnnot()} + +\item{cor.th:}{a threshold abtained on the partial correlation value} + +\item{verbose:}{print out information during run} + +\item{layout:}{the network layout by default is circular (layout_in_circle) and will be a weighted Fruchterman-Reingold otherwise} +} +\value{ +plots a graph +} +\description{ +This function will use the miic package to compute the co-occurance of features in a population of models +} diff --git a/man/mergeMeltBestScoreCV.Rd b/man/mergeMeltBestScoreCV.Rd new file mode 100644 index 0000000..60efa90 --- /dev/null +++ b/man/mergeMeltBestScoreCV.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{mergeMeltBestScoreCV} +\alias{mergeMeltBestScoreCV} +\title{mergeMeltBestScoreCV} +\usage{ +mergeMeltBestScoreCV( + list.results.digest, + k_catalogue = NULL, + score = "auc_", + penalty = 0, + min.kfold.nb = FALSE +) +} +\arguments{ +\item{list.results.digest:}{a list of digest objects one for each learner used. For example, list(res.terda.digest, res.terga.digest, res.terbeam.digest)} + +\item{k_catalogue:}{the k_catalogue that will serve to build the result matrix (default:NULL)} + +\item{score:}{the name of the score that needs to be used for the whole dataset visualization.} + +\item{min.kfold.nb:}{wether we should restrict all experiments in the smallest number of k-folds of a comparative analyses (default = FALSE)} +} +\value{ +a list of two data.frames +} +\description{ +mergeMeltBestScoreCV returns a list of data frames that contain the best performance of the different learners without any focus on sparsity. +} +\details{ +Merge a list of cross validation scores form digest results +} diff --git a/man/mergeMeltImportanceCV.Rd b/man/mergeMeltImportanceCV.Rd new file mode 100644 index 0000000..a2fcee5 --- /dev/null +++ b/man/mergeMeltImportanceCV.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{mergeMeltImportanceCV} +\alias{mergeMeltImportanceCV} +\title{mergeMeltImportanceCV} +\usage{ +mergeMeltImportanceCV( + list.results, + filter.cv.prev = 0.5, + min.kfold.nb = FALSE, + type = "mda", + learner.grep.pattern = "*", + nb.top.features = 25, + feature.selection = NULL, + fixed.order = FALSE, + scaled.importance = TRUE, + make.plot = TRUE, + main = FALSE, + cv.prevalence = TRUE +) +} +\arguments{ +\item{list.results.digest:}{a list of digest objects one for each learner used. For example, list(res.terda.digest, res.terga.digest, res.terbeam.digest)} + +\item{filter.cv.prev:}{filter variable for each learner based on the appearence prevalence in the cross validation.} + +\item{min.kfold.nb:}{wether we should restrict all experiments in the smallest number of k-folds of a comparative analyses (default = FALSE)} + +\item{type:}{the type of importance "mda (mean decreased accuracy)" or "pda (prevalence decreased accuracy)" (default = mda)} + +\item{learner.grep.pattern:}{select a subset of learners using a grep pattern (default:"*")} + +\item{nb.top.features:}{the number of top features to focus on the plot} + +\item{feature.selection:}{the names of the features to be selected (default:NULL)} + +\item{fixed.order:}{if the order of features in the plot should follow the feature selection one (default = FALSE)} + +\item{scaled.importance:}{the scaled importance is the importance multipied by the prevalence in the folds. If (default = TRUE) this will be used, the mean mda +will be scaled by the prevalence of the feature in the folds and ordered subsequently} + +\item{make.plot:}{make a plot for all the learners} + +\item{main:}{should add the title to the graph for correct alignment (default:FALSE)} + +\item{cv.prevalence:}{wether or not to plot the distribution of the prevalence of the feature in the top-models for each k-fold in the graph (default:FALSE)} +} +\value{ +a list of several data.frames and a ggplot object +} +\description{ +mergeMeltImportanceCV returns a list of data frames that contain the feature importance of the different learners without any focus on sparsity. +} +\details{ +Merge a list of cross validation scores form digest results +} diff --git a/man/mergeMeltScoreCV.Rd b/man/mergeMeltScoreCV.Rd new file mode 100644 index 0000000..5d67d0e --- /dev/null +++ b/man/mergeMeltScoreCV.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{mergeMeltScoreCV} +\alias{mergeMeltScoreCV} +\title{mergeMeltScoreCV} +\usage{ +mergeMeltScoreCV( + list.results.digest, + k_catalogue, + generalization = TRUE, + score = "auc_" +) +} +\arguments{ +\item{list.results.digest:}{a list of digest objects one for each learner used. For example, list(res.terda.digest, res.terga.digest, res.terbeam.digest)} + +\item{k_catalogue:}{the k_catalogue that will serve to build the result matrix} + +\item{generalization:}{get results from CV generalization (if TRUE) or empirical otherwise (default: TRUE)} + +\item{score:}{the name of the score that needs to be used for the whole dataset visualization.} +} +\value{ +a list of two data.frames +} +\description{ +mergeMeltScoreCV returns a list of data frames that contain the performance of each digest in the list with their sparsity. +} +\details{ +Merge a list of cross validation scores form digest results +} diff --git a/man/mergeMeltScoreEmpirical.Rd b/man/mergeMeltScoreEmpirical.Rd new file mode 100644 index 0000000..87d9e64 --- /dev/null +++ b/man/mergeMeltScoreEmpirical.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{mergeMeltScoreEmpirical} +\alias{mergeMeltScoreEmpirical} +\title{mergeMeltScoreEmpirical} +\usage{ +mergeMeltScoreEmpirical(list.results.digest, k_catalogue, score = "fit_") +} +\arguments{ +\item{list.results.digest:}{a list of digest objects one for each learner used. For example, list(res.terda.digest, res.terga.digest, res.terbeam.digest)} + +\item{k_catalogue:}{the k_catalogue that will serve to build the result matrix} + +\item{score:}{which score is to be used for value (default: fit_)} +} +\value{ +a data.frame +} +\description{ +mergeMeltScoreEmpirical returns a data frames that contain the performance of each digest in the list with their sparsity. +} +\details{ +Merge a list of empirical scores form digest results +} diff --git a/man/mergeResults.Rd b/man/mergeResults.Rd new file mode 100644 index 0000000..c9ef842 --- /dev/null +++ b/man/mergeResults.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{mergeResults} +\alias{mergeResults} +\title{mergeResults} +\usage{ +mergeResults( + list.results, + sparsity = NULL, + penalty = 0.001, + best.k = NULL, + colors = NULL, + pch = NULL +) +} +\arguments{ +\item{list.results:}{a list of Experiment objects one for each learner used. For example, list(res.terda, res.terga, res.terbeam)} + +\item{sparsity:}{Sometimes a given method will have results with somehow different sparsity. This param will allow to set the catalogue of sparsity} + +\item{best.k:}{a vector defining wether a given k should be used to set the best model selection (default:NULL).} + +\item{colors:}{a vector defining the colors to be used in the graphics. If not specified they will be set by default. (default:NULL).} + +\item{pch:}{a vector defining the shape of the points to be used in the graphics. If not specified they will be set by default. (default:NULL).} +} +\value{ +list of data.frames and lists +} +\description{ +mergeResults returns a list of data frames that contain the performance of each digest in the list with their sparsity. +} +\details{ +Merge a list of Scores form a digest results +} diff --git a/man/metal.Rd b/man/metal.Rd new file mode 100644 index 0000000..62d89f1 --- /dev/null +++ b/man/metal.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metal.R +\name{metal} +\alias{metal} +\title{metal: metal searching algorithm} +\usage{ +metal( + sparsity = 1:10, + max.nb.features = 1000, + popSaveFile = "NULL", + saveFiles = FALSE, + pathSave = "NULL", + language = "mix", + scoreFormula = scoreRatio, + epsilon = "NULL", + objective = "auc", + k_penalty = 0, + evalToFit = "accuracy_", + estimate_coefs = FALSE, + intercept = "NULL", + testAllSigns = FALSE, + plot = FALSE, + verbose = TRUE, + warnings = FALSE, + debug = FALSE, + print_ind_method = "short", + parallelize.folds = TRUE, + nCores = 10, + seed = "NULL", + experiment.id = "NULL", + experiment.description = "NULL", + experiment.save = "nothing", + list.clfs = "NULL", + unificator.method = "terga2", + unificator.evolver = "v2m_new" +) +} +\arguments{ +\item{language}{is the language that is used by the different algorithms {bin, bininter, ter, terinter, ratio}, (default:"terinter")} + +\item{sparsity:}{number of features in a given model. This is a vector with multiple lengths.} + +\item{max.nb.features:}{focuses only on the subset of top most significant features (default:1000)} + +\item{popSaveFile:}{(??)} + +\item{saveFiles:}{??} + +\item{scoreFormula:}{a Function that contains the ratio Formula or other specific ones} + +\item{epsilon:}{a small value to be used with the ratio language (useCustomLanguage) (default: NULL). When null it is going to be calculated by the minimum value of X divided by 10.} + +\item{objective:}{this can be auc, cor or aic. Terga can also predict regression, other than class prediction. (default:auc)} + +\item{estimate_coefs:}{non ternary solution for the aic objective (default:FALSE)} + +\item{evalToFit:}{The model performance attribute to use as fitting score (default:"fit_"). Other choices are c("auc_","accuracy_","precision_","recall_","f_score_")} + +\item{k_penalty:}{Penalization of the fit by the k_sparsity (default: 0)} + +\item{intercept:}{(??) (default:NULL)} + +\item{testAllSigns:}{??} + +\item{plot:}{plot graphics indicating the evolution of the simulation (default:FALSE)} + +\item{verbose:}{print out information on the progress of the algorithm (default:TRUE)} + +\item{warnings:}{Print out warnings when runnig (default:FALSE).} + +\item{debug:}{print debug information (default:FALSE)} + +\item{print_ind_method:}{One of c("short","graphical") indicates how to print a model and subsequently a population during the run (default:"short").} + +\item{parallelize.folds:}{parallelize folds when cross-validating (default:TRUE)} + +\item{nCores:}{the number of cores to execute the program. If nCores=1 than the program runs in a non parallel mode} + +\item{seed:}{the seed to be used for reproductibility. If seed=NULL than it is not taken into account (default:NULL).} + +\item{experiment.id:}{The id of the experiment that is to be used in the plots and comparitive analyses (default is the learner's name, when not specified)} + +\item{experiment.description:}{A longer description of the experiment. This is important when many experiments are run and can also be printed in by the printExperiment function.} + +\item{experiment.save:}{Data from an experiment can be saved with different levels of completness, with options to be selected from c("nothing", "minimal", "full"), default is "minimal"} + +\item{list.clfs:}{list of Genetor and Unificator} + +\item{unificator.method:}{the default unificator is a terga2. Other one specified will yield a stop of the program.} + +\item{unificator.evolver:}{the default evolve method used by the unificator which is by default a terga2.} +} +\value{ +an object containing a list of parameters for this classifier +} +\description{ +metal is a model search algorithm on a list of beam search approach and get the populations into GA. +} diff --git a/man/modelCollectionToPopulation.Rd b/man/modelCollectionToPopulation.Rd new file mode 100644 index 0000000..9e1d287 --- /dev/null +++ b/man/modelCollectionToPopulation.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{modelCollectionToPopulation} +\alias{modelCollectionToPopulation} +\title{Transform a model collection to a population (or list of model objects)} +\usage{ +modelCollectionToPopulation(mod.collection) +} +\arguments{ +\item{mod.collection:}{a modelCollection object organized by k_sparsity} +} +\description{ +Transform a model collection to a population (or list of model objects) +} diff --git a/man/modelToDenseVec.Rd b/man/modelToDenseVec.Rd new file mode 100644 index 0000000..9063150 --- /dev/null +++ b/man/modelToDenseVec.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{modelToDenseVec} +\alias{modelToDenseVec} +\title{Transform the model object onto dense format (long) one} +\usage{ +modelToDenseVec(natts, mod) +} +\arguments{ +\item{natts:}{the number of attributes} + +\item{mod:}{a predomics model object} +} +\value{ +a dense (long) format model +} +\description{ +Builds a model object based on model that is in the dense (long) format. +} diff --git a/man/multipleRR.Rd b/man/multipleRR.Rd new file mode 100644 index 0000000..4e2c839 --- /dev/null +++ b/man/multipleRR.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terda.lib.R +\name{multipleRR} +\alias{multipleRR} +\title{multipleRR} +\usage{ +multipleRR(clf, X, y, w, n, remove.zero.vec = TRUE) +} +\arguments{ +\item{clf:}{the classifier parameter object} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{w:}{a vector of wi coefficients} + +\item{n:}{number of round roundings to compute} + +\item{remove.zero.vec:}{whether to remove the zero vectors} +} +\value{ +an population of dense vectors +} +\description{ +computes multiple randomized rounding for a given vector of wi +} diff --git a/man/multipleRR_par.Rd b/man/multipleRR_par.Rd new file mode 100644 index 0000000..94e86c0 --- /dev/null +++ b/man/multipleRR_par.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terda.lib.R +\name{multipleRR_par} +\alias{multipleRR_par} +\title{multipleRR_par} +\usage{ +multipleRR_par(clf, X, y, w, n, remove.zero.vec = TRUE) +} +\arguments{ +\item{clf:}{the classifier parameter object} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{w:}{a vector of wi coefficients} + +\item{n:}{number of round roundings to compute} + +\item{remove.zero.vec:}{whether to remove the zero vectors} +} +\value{ +an population of dense vectors +} +\description{ +computes in parallel multiple randomized rounding for a given vector of wi +} diff --git a/man/mutate.Rd b/man/mutate.Rd new file mode 100644 index 0000000..db4c838 --- /dev/null +++ b/man/mutate.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga1.lib.R +\name{mutate} +\alias{mutate} +\title{Changes feature indexes in a given percentage of models.} +\usage{ +mutate(clf, pop, selection, seed = NULL) +} +\arguments{ +\item{clf:}{the classifier parameter object} + +\item{pop:}{A population (i.e. list) of index vectors} + +\item{selection:}{Indexes of the population pointing to the subset of the models to be changed} + +\item{seed:}{For reproductibility purpose to fix the random generator number.} +} +\value{ +a population of models among which the mutated ones +} +\description{ +This function is used in terga1 will create new combinations of features based of existing ones from the parents. +} diff --git a/man/myAssert.Rd b/man/myAssert.Rd new file mode 100644 index 0000000..1a12546 --- /dev/null +++ b/man/myAssert.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/minitools.R +\name{myAssert} +\alias{myAssert} +\title{Asserts a condition and prints a message or stops the block} +\usage{ +myAssert(condition, message, stop = TRUE) +} +\arguments{ +\item{condition:}{condition to be tested} + +\item{message:}{message to be printed} + +\item{stop:}{if TRUE stop the block} +} +\description{ +Asserts a condition and prints a message or stops the block +} diff --git a/man/myAssertNotNullNorNa.Rd b/man/myAssertNotNullNorNa.Rd new file mode 100644 index 0000000..e9fd7f3 --- /dev/null +++ b/man/myAssertNotNullNorNa.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/minitools.R +\name{myAssertNotNullNorNa} +\alias{myAssertNotNullNorNa} +\title{Asserts the existance of an object and prints a message or stops the block} +\usage{ +myAssertNotNullNorNa(obj, message = "", stop = FALSE) +} +\arguments{ +\item{obj:}{condition to be tested} + +\item{message:}{message to be printed} + +\item{stop:}{if TRUE stop the block} +} +\description{ +Asserts the existance of an object and prints a message or stops + the block +} diff --git a/man/names2index.Rd b/man/names2index.Rd new file mode 100644 index 0000000..bac133d --- /dev/null +++ b/man/names2index.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{names2index} +\alias{names2index} +\title{names2index} +\usage{ +names2index(X, var.names) +} +\arguments{ +\item{X:}{the dataset} + +\item{var.names:}{the feature names vector} +} +\value{ +the index of the features +} +\description{ +Transforms feature names feature indexes +} diff --git a/man/normModelCoeffs.Rd b/man/normModelCoeffs.Rd new file mode 100644 index 0000000..bd52e36 --- /dev/null +++ b/man/normModelCoeffs.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{normModelCoeffs} +\alias{normModelCoeffs} +\title{Normalize the model coefficients needed for the plot} +\usage{ +normModelCoeffs(mod, X, y, sort.features = FALSE, sort.ind = NULL) +} +\arguments{ +\item{mod:}{a model to plot} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the class vector} + +\item{sort.features:}{wether the features need to be sorted by correlation with 'y' or not (default:FALSE)} + +\item{sort.ind:}{computing sorting can take time if computed for every model and can be computed outside the function and passed as a parameter} +} +\value{ +the normalized coefficients +} +\description{ +Normalize the model coefficients needed for the plot +} diff --git a/man/obesity.Rd b/man/obesity.Rd new file mode 100644 index 0000000..801ec8b --- /dev/null +++ b/man/obesity.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{obesity} +\alias{obesity} +\title{Obesity (frequencies) from the MetaHIT study} +\description{ +This dataset consists of frequency abundance files as downloaded from http://waldronlab.io/curatedMetagenomicData/ +This is a list containing two elements: (i) the X data matrix with 1045 species and 292 observations and (ii) patient class = -1 (n=167) and healthy controls (n=96). +Caution, this dataset has also a class 0 with overweight patients, which needs to be omited from both X and y +} +\author{ +Le Chatelier, Emmanuelle, Trine Nielsen, Junjie Qin, Edi Prifti, Falk Hildebrand, Gwen Falony, Mathieu Almeida, et al “Richness of human gut microbiome correlates with metabolic markers.” Nature 500, no. 7464 (April 9, 2014): 541–546. +} +\keyword{microbiome,} +\keyword{obesity,} +\keyword{species} diff --git a/man/plotAUC.Rd b/man/plotAUC.Rd new file mode 100644 index 0000000..b7bfcad --- /dev/null +++ b/man/plotAUC.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotAUC} +\alias{plotAUC} +\title{Analyze the results from a given classifier} +\usage{ +plotAUC(score, y, main = "", ci = TRUE, percent = TRUE) +} +\arguments{ +\item{score:}{this is the y^ of a given model} + +\item{y:}{the class to be predted} + +\item{main:}{title of the graph} + +\item{ci:}{the point shape for the graph} + +\item{percent:}{color for the graph} +} +\value{ +a roc object +} +\description{ +Analyze the results from a given classifier. +} diff --git a/man/plotAUCg.Rd b/man/plotAUCg.Rd new file mode 100644 index 0000000..983ce2b --- /dev/null +++ b/man/plotAUCg.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotAUCg} +\alias{plotAUCg} +\title{Plot the AUC of a given classifier} +\usage{ +plotAUCg(mod = NULL, score, y, main = "", ci = TRUE, show.intercept = TRUE) +} +\arguments{ +\item{mod:}{a predomics model object (default = NULL)} + +\item{score:}{this is the y^ of a given model} + +\item{y:}{the class to be predicted} + +\item{main:}{title of the graph} + +\item{ci:}{the point shape for the graph} + +\item{show.intercept:}{plot or not the intercept on the graph (default:TRUE)} +} +\value{ +a ggplot object +} +\description{ +Analyze the results from a given classifier. +} diff --git a/man/plotAbundanceByClass.Rd b/man/plotAbundanceByClass.Rd new file mode 100644 index 0000000..8566d7c --- /dev/null +++ b/man/plotAbundanceByClass.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotAbundanceByClass} +\alias{plotAbundanceByClass} +\title{Plots the prevalence of a list of features in the whole dataset and per each class} +\usage{ +plotAbundanceByClass( + features, + X, + y, + topdown = TRUE, + main = "", + plot = TRUE, + col.pt = c("deepskyblue4", "firebrick4"), + col.bg = c("deepskyblue1", "firebrick1") +) +} +\arguments{ +\item{features:}{a list of features or features indexes for which we wish to compute prevalence} + +\item{X:}{dataset where to compute the prevalence} + +\item{y:}{if provided it will also compute hte prevalence per each class (default:NULL)} + +\item{topdown:}{showing features from top-down or the other way around (default:TRUE)} + +\item{main:}{main title (default:none)} + +\item{plot:}{if TRUE this provides a plot, otherwise will return different metrics such as prevalence and enrichment statistics} + +\item{col.pt:}{colors for the point border (-1:deepskyblue4, 1:firebrick4)} + +\item{col.bg:}{colors for the point fill (-1:deepskyblue1, 1:firebrick1)} +} +\value{ +a ggplot object +} +\description{ +Plots the abundance of a given number of features for each class and tests significance +} diff --git a/man/plotComparativeBestCV.Rd b/man/plotComparativeBestCV.Rd new file mode 100644 index 0000000..a4138fc --- /dev/null +++ b/man/plotComparativeBestCV.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotComparativeBestCV} +\alias{plotComparativeBestCV} +\title{Plots a graph for a given score} +\usage{ +plotComparativeBestCV( + digested.results, + ylim = c(0.5, 1), + generalization = TRUE, + score = "auc_", + ci = TRUE, + main = "" +) +} +\arguments{ +\item{digested.results:}{a list of data.frames containing performance results from a lists of learners. This data object is returned by the function merge_digestScores()} + +\item{ylim:}{y-axis zoom in the plot} + +\item{score:}{default (auc_) score for the cross-validation representation} + +\item{main:}{name of the graphic} +} +\value{ +A ggplot graphs +} +\description{ +plotComparativeCV plots a digested.results data object for a given score. +} diff --git a/man/plotComparativeCV.Rd b/man/plotComparativeCV.Rd new file mode 100644 index 0000000..c144fd5 --- /dev/null +++ b/man/plotComparativeCV.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotComparativeCV} +\alias{plotComparativeCV} +\title{Plots a graph for a given score} +\usage{ +plotComparativeCV( + digested.results, + ylim = c(0.5, 1), + generalization = TRUE, + score = "auc_", + ci = TRUE, + main = "" +) +} +\arguments{ +\item{digested.results:}{a list of data.frames containing performance results from a lists of learners. This data object is returned by the function merge_digestScores()} + +\item{ylim:}{y-axis zoom in the plot} + +\item{generalization:}{when (default:TRUE) then the generalization score will be used} + +\item{score:}{default (auc_) score for the cross-validation representation} + +\item{ci:}{should the confidence intereval be plotted (default:TRUE)} + +\item{main:}{name of the graphic} +} +\value{ +A ggplot graphs +} +\description{ +plotComparativeCV plots a digested.results data object for a given score. +} diff --git a/man/plotComparativeEmpiricalScore.Rd b/man/plotComparativeEmpiricalScore.Rd new file mode 100644 index 0000000..a58b9db --- /dev/null +++ b/man/plotComparativeEmpiricalScore.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotComparativeEmpiricalScore} +\alias{plotComparativeEmpiricalScore} +\title{Plots a graph for a given score} +\usage{ +plotComparativeEmpiricalScore( + digested.results, + ylim = c(0.5, 1), + score = "auc_", + main = "" +) +} +\arguments{ +\item{digested.results:}{a list of data.frames containing performance results from a lists of learners. This data object is returned by the function merge_digestScores()} + +\item{ylim:}{y-axis zoom in the plot} + +\item{score:}{default (auc_) score} + +\item{main:}{name of the graphic} +} +\value{ +A ggplot graphs +} +\description{ +plotComparativeEmpiricalScore plots a digested.results data object for a given score. +} diff --git a/man/plotComparativeResults.Rd b/man/plotComparativeResults.Rd new file mode 100644 index 0000000..ae42619 --- /dev/null +++ b/man/plotComparativeResults.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotComparativeResults} +\alias{plotComparativeResults} +\title{Plot performance scores for multiple learners} +\usage{ +plotComparativeResults( + digested.results, + plot = TRUE, + ylim = c(0.5, 1), + best = FALSE, + ci = FALSE, + main = "", + mode = "classification" +) +} +\arguments{ +\item{digested.results:}{a list of data.frames containing performance results from a lists of learners. This data object is returned by the function merge_digestScores()} + +\item{ylim:}{y-axis zoom in the plot} + +\item{best:}{a swith to plot the best values instead of declining by k_sparsity} + +\item{main:}{name of the graphic} + +\item{mode:}{either classification or regression (default:classification)} +} +\value{ +A list of ggplot graphs if plot is set to FALSE and a pannel organized graph otherwise. +} +\description{ +plotComparativeResults plots a digested.results data object to compare performance results between different learners. +} diff --git a/man/plotComparativeResultsBest.Rd b/man/plotComparativeResultsBest.Rd new file mode 100644 index 0000000..8e1fcf5 --- /dev/null +++ b/man/plotComparativeResultsBest.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotComparativeResultsBest} +\alias{plotComparativeResultsBest} +\title{Plot performance scores for multiple learners} +\usage{ +plotComparativeResultsBest(digested.results, plot = TRUE, ylim = c(0.5, 1)) +} +\arguments{ +\item{digested.results:}{a list of data.frames containing performance results from a lists of learners. This data object is returned by the function merge_digestScores()} + +\item{ylim:}{y-axis zoom in the plot} +} +\value{ +A list of ggplot graphs if plot is set to FALSE and a pannel organized graph otherwise. +} +\description{ +plotComparativeResultsBest plots a digested.results data object to compare performance results between different learners focusing at the best model. +} diff --git a/man/plotFeatureModelCoeffs.Rd b/man/plotFeatureModelCoeffs.Rd new file mode 100644 index 0000000..b02cdd1 --- /dev/null +++ b/man/plotFeatureModelCoeffs.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotFeatureModelCoeffs} +\alias{plotFeatureModelCoeffs} +\title{Plots the prevalence of a list of features in the whole dataset and per each class} +\usage{ +plotFeatureModelCoeffs( + feat.model.coeffs, + topdown = TRUE, + main = "", + col = c("deepskyblue1", "white", "firebrick1"), + vertical.label = TRUE +) +} +\arguments{ +\item{feat.model.coeffs:}{feature vs. model coeffient table} + +\item{topdown:}{showing features from top-down or the other way around (default:TRUE)} + +\item{main:}{main title (default:none)} + +\item{col:}{colors to be used for the coeffients (default: -1 = deepskyblue1, 0 = white, 1 = firebrick1)} + +\item{vertical.label:}{wether the x-axis labels should be vertical or not (default:TRUE)} +} +\value{ +a ggplot object +} +\description{ +Plots the coefficients of subset of features in the models where they are found +} diff --git a/man/plotImportanceFeaturesFBMobjects.Rd b/man/plotImportanceFeaturesFBMobjects.Rd new file mode 100644 index 0000000..70a0e27 --- /dev/null +++ b/man/plotImportanceFeaturesFBMobjects.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyzeImportantFeaturesFBM.R +\name{plotImportanceFeaturesFBMobjects} +\alias{plotImportanceFeaturesFBMobjects} +\title{Visualize a list containing outouts of getImportanceFeaturesFBMobjects} +\usage{ +plotImportanceFeaturesFBMobjects( + FBMobjList, + verbose = TRUE, + nb.top.features = 100, + makeplot = TRUE +) +} +\arguments{ +\item{FBMobjList}{List of outputs of getImportanceFeaturesFBMobjects +function (1 list per experiment to combine)} + +\item{verbose}{print out informaiton} + +\item{nb.top.features}{features to retain for visualization (top features +with highest mean feature importance across datasets)} + +\item{makeplot}{make a pdf file with the resulting plots (default:TRUE)} +} +\value{ +Combined visualization of feature prevalence in FBM + feature +importance + feature effect size across groups + feature prevalence across +groups in different predomics prediction tasks +} +\description{ +Here we combine the 4 datasets generated by +getImportanceFeaturesFBMobjects function from different prediction experiments +(clf object + X + y) ; designed to combine predomics results with different +X, y source data for unified visualization (feature prevalence in FBM, +feature importance, feature effect size across groups, feature prevalence +across groups) +} diff --git a/man/plotModel.Rd b/man/plotModel.Rd new file mode 100644 index 0000000..180cb8a --- /dev/null +++ b/man/plotModel.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotModel} +\alias{plotModel} +\title{Plots a model or a population of model objectsas barplots of scaled coefficients.} +\usage{ +plotModel( + mod, + X, + y, + sort.features = FALSE, + sort.ind = NULL, + feature.name = FALSE, + col.sign = c("deepskyblue1", "firebrick1"), + main = "", + slim = FALSE, + importance = FALSE, + res_clf = NULL +) +} +\arguments{ +\item{mod:}{a model to plot} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the class vector} + +\item{sort.features:}{wether the features need to be sorted by correlation with 'y' or not (default: TRUE)} + +\item{sort.ind:}{computing sorting can take time if computed for every model and can be computed outside the function and passed as a parameter} + +\item{feature.name:}{show the name of the features (default:FALSE)} + +\item{col.sign:}{the colors of the cofficients based on the sign of the coefficients (default: -1=deepskyblue1, 1:firebrick1)} + +\item{main:}{possibility to change the title of the function (default:"")} + +\item{slim:}{plot without axis information (default:FALSE)} + +\item{importance:}{the importance (mda) of the features in crossval} + +\item{res_clf:}{the result of the learning process (default:NULL). If provided information on MDA will be extracted for the importance graphic.} +} +\description{ +Plots a model or a population of models as a barplots, representing each feature, the length being the coefficient +} diff --git a/man/plotModelScore.Rd b/man/plotModelScore.Rd new file mode 100644 index 0000000..c38a09c --- /dev/null +++ b/man/plotModelScore.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotModelScore} +\alias{plotModelScore} +\title{Plots a model or a population of model objectsas barplots of scaled coefficients.} +\usage{ +plotModelScore( + mod = NULL, + y = NULL, + col.sign = c("deepskyblue1", "firebrick1"), + main = "" +) +} +\arguments{ +\item{mod:}{a model to plot} + +\item{y:}{the class to predict} + +\item{col.sign:}{the colors of the cofficients based on the sign of the coefficients (default: -1=deepskyblue1, 1:firebrick1)} + +\item{main:}{possibility to change the title of the function (default:"")} +} +\description{ +Plots a model score or a population of models as a barplots, representing each feature, the length being the coefficient +} diff --git a/man/plotPopulation.Rd b/man/plotPopulation.Rd new file mode 100644 index 0000000..2c4c2c5 --- /dev/null +++ b/man/plotPopulation.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotPopulation} +\alias{plotPopulation} +\title{Plots a population of models (or a single model) objects as barplots of scaled coefficients.} +\usage{ +plotPopulation( + pop, + X, + y, + sort.features = FALSE, + sort.ind = NULL, + col.sign = c("deepskyblue1", "firebrick1"), + ncol = 10, + slim = FALSE, + importance = FALSE +) +} +\arguments{ +\item{pop:}{a population of models to plot} + +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the class vector} + +\item{sort.features:}{wether the features need to be sorted by correlation with 'y' or not} + +\item{sort.ind:}{computing sorting can take time if computed for every model and can be computed outside the function and passed as a parameter} + +\item{col.sign:}{the colors of the cofficients based on the sign of the coefficients (default: -1=deepskyblue1,1:firebrick1)} + +\item{ncol:}{number of graphics for each line (default: 10)} + +\item{slim:}{plot without axis information (default:FALSE)} + +\item{importance:}{the importance (mda) of the features in crossval} +} +\description{ +Plots an model or a population of models as a barplots, representing each feature, the length being the coefficient +} diff --git a/man/plotPrevalence.Rd b/man/plotPrevalence.Rd new file mode 100644 index 0000000..afd2f20 --- /dev/null +++ b/man/plotPrevalence.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotPrevalence} +\alias{plotPrevalence} +\title{Plots the prevalence of a list of features in the whole dataset and per each class} +\usage{ +plotPrevalence( + features, + X, + y, + topdown = TRUE, + main = "", + plot = TRUE, + col.pt = c("deepskyblue4", "firebrick4"), + col.bg = c("deepskyblue1", "firebrick1"), + zero.value = 0 +) +} +\arguments{ +\item{features:}{a list of features or features indexes for which we wish to compute prevalence} + +\item{X:}{dataset where to compute the prevalence} + +\item{y:}{if provided it will also compute hte prevalence per each class (default:NULL)} + +\item{topdown:}{showing features from top-down or the other way around (default:TRUE)} + +\item{main:}{main title (default:none)} + +\item{plot:}{if TRUE this provides a plot, otherwise will return different metrics such as prevalence and enrichment statistics} + +\item{col.pt:}{colors for the point border (-1:deepskyblue4, 1:firebrick4)} + +\item{col.bg:}{colors for the point fill (-1:deepskyblue1, 1:firebrick1)} + +\item{zero.value:}{the value that specifies what is zero. This can be a different than 0 in log transformed data for instance (default = 0)} +} +\value{ +a ggplot object +} +\description{ +Plots the prevalence of a given number of features +} diff --git a/man/plotScoreBarcode.Rd b/man/plotScoreBarcode.Rd new file mode 100644 index 0000000..2db596a --- /dev/null +++ b/man/plotScoreBarcode.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{plotScoreBarcode} +\alias{plotScoreBarcode} +\title{Plots the barcode of the total score as well as positive and negative components} +\usage{ +plotScoreBarcode(dscore, y, nb.col.levels = 30, main = "") +} +\arguments{ +\item{dscore:}{an object containing different statistics on a model} + +\item{y:}{the class vector} + +\item{clf:}{an object containing the different parameters of the classifier} + +\item{nb.col.levels:}{number of distinct colors from the viridis palette (default:30)} + +\item{main:}{a title for the graphic} +} +\value{ +nothing +} +\description{ +Plots the barcode of the total score as well as positive and negative components +} diff --git a/man/population.Rd b/man/population.Rd new file mode 100644 index 0000000..86ad64c --- /dev/null +++ b/man/population.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga1.lib.R +\name{population} +\alias{population} +\title{Creates a population of index models.} +\usage{ +population( + clf, + size_ind, + size_world, + best_ancestor = NULL, + size_pop = NULL, + seed = NULL +) +} +\arguments{ +\item{clf:}{the classifier parameter object} + +\item{size_ind:}{The sparsity of the models. All the models of this population will have the same number of features.} + +\item{size_world:}{The number of features from which we can choose the indices. This is needed to compute the combinatory space search.} + +\item{best_ancestor:}{We can supply to the popolution an individual (vector with indeces) of a lower sparsity. This will ensure to seed part of the population with at least those genes. We added this feature after an observations that a local optimum of lower sparsity was lost in higher sparsities.} + +\item{size_pop:}{the number of models to produce (default=NULL). This information is stored here clf$params$size_pop, but this parameter allows to override it.} +} +\value{ +a population of index models +} +\description{ +This function is used in terga1 and generates a list of index vectors in the variable space. These vectors can be unique or not. NB that if clf$params$unique_vars is set to TRUE it can take a long time to come out of the while loop which ensures the uniqueness of the individuals. +} diff --git a/man/populationGet_X.Rd b/man/populationGet_X.Rd new file mode 100644 index 0000000..f4a870b --- /dev/null +++ b/man/populationGet_X.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{populationGet_X} +\alias{populationGet_X} +\title{Get the best model from a classifier result} +\usage{ +populationGet_X(element2get, toVec = TRUE, na.rm = TRUE) +} +\arguments{ +\item{element2get:}{the name of the attribute to get} + +\item{toVec:}{should the results be unlisted (default:TRUE)} + +\item{na.rm:}{delete the elements that are NA (default) when returning tovec} +} +\value{ +a vector of attributes +} +\description{ +Gets a given attribute from a population of predomics objects +} diff --git a/man/populationSet_X.Rd b/man/populationSet_X.Rd new file mode 100644 index 0000000..5e9b9c8 --- /dev/null +++ b/man/populationSet_X.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{populationSet_X} +\alias{populationSet_X} +\title{Set models with a given liist of objects} +\usage{ +populationSet_X(pop, element2set = NULL, listwithelements = NULL) +} +\arguments{ +\item{element2set:}{the name of the attribute to set} + +\item{listwithelements:}{the list containing the elements to add} +} +\value{ +an updated population +} +\description{ +Sets a given attribute to the objects of the a given population +} diff --git a/man/populationToDataFrame.Rd b/man/populationToDataFrame.Rd new file mode 100644 index 0000000..fb4eb80 --- /dev/null +++ b/man/populationToDataFrame.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{populationToDataFrame} +\alias{populationToDataFrame} +\title{populationToDataFrame} +\usage{ +populationToDataFrame( + pop, + attributes = c("learner", "language", "fit_", "unpenalized_fit_", "auc_", "accuracy_", + "cor_", "aic_", "intercept_", "eval.sparsity", "sign_", "precision_", "recall_", + "f1_") +) +} +\arguments{ +\item{pop:}{a list of model objects, (i.e a population of models)} + +\item{attributes:}{the list of attributes that we wish to have in the data.frame (default:"learner","language","fit_", "unpenalized_fit_", "auc_", "accuracy_", "cor_", "aic_", "intercept_", "eval.sparsity", "sign_","precision_", "recall_","f1_")} +} +\value{ +an data frame with attributes for each model +} +\description{ +For each model in the list of models it will extract each attribute and create a dataframe needed for further exploration +} diff --git a/man/printClassifier.Rd b/man/printClassifier.Rd new file mode 100644 index 0000000..9eea90c --- /dev/null +++ b/man/printClassifier.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{printClassifier} +\alias{printClassifier} +\title{Prints as text the detail on a given Classifier object} +\usage{ +printClassifier(obj, indent = "\\t--- ") +} +\arguments{ +\item{obj:}{a Classifier object} + +\item{indent:}{a string (default:'tab---') that will precede each element of the object.} +} +\value{ +NULL if the object is not a valid Classifier +} +\description{ +This function prints a summary of a Classifier object. +} diff --git a/man/printExperiment.Rd b/man/printExperiment.Rd new file mode 100644 index 0000000..0cacd57 --- /dev/null +++ b/man/printExperiment.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{printExperiment} +\alias{printExperiment} +\title{Prints as text the detail on a given Experiment object} +\usage{ +printExperiment(obj, indent = "\\t--- ") +} +\arguments{ +\item{obj:}{an Experiment object} + +\item{indent:}{a string (default:'tab---') that will precede each element of the object.} +} +\value{ +NULL if the object is not a valid Experiment +} +\description{ +This function prints a summary of an Experiment object. +} diff --git a/man/printModel.Rd b/man/printModel.Rd new file mode 100644 index 0000000..3827db8 --- /dev/null +++ b/man/printModel.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{printModel} +\alias{printModel} +\title{# plot a horizontal barplot +#' @export +plotBarplot <- function(v, rev=TRUE, xlim=range(v), main=""){ + if(rev) v <- rev(v) + barplot(v, las=2, horiz=TRUE, col="black", main=main, xlim=xlim) +} +Prints a model object as text.} +\usage{ +printModel(mod, method = "short", score = "fit_") +} +\arguments{ +\item{mod:}{a model to plot} + +\item{method:}{an object containing the different parameters of the classifier} + +\item{score:}{which score to show in the fit (default:fit_)} +} +\description{ +Prints a model object as text +} diff --git a/man/printModelCollection.Rd b/man/printModelCollection.Rd new file mode 100644 index 0000000..ebaa8a9 --- /dev/null +++ b/man/printModelCollection.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{printModelCollection} +\alias{printModelCollection} +\title{Prints as text the detail on a given ModelCollection object} +\usage{ +printModelCollection(obj, indent = "\\t--- ", method = "long") +} +\arguments{ +\item{obj:}{a ModelCollection object} + +\item{indent:}{a string (default:'tab---') that will precede each element of the object for the "long" method.} + +\item{method:}{the output method (default:long) will print for each k_sparsity a short information of the population of models, +while the short method will output the number of models for each k_sparsity} +} +\value{ +NULL if the object is not a valid ModelCollection. +} +\description{ +This function prints a ModelCollection object. For each k_sparsity it will show some detail of +the maximum first models +} diff --git a/man/printPopulation.Rd b/man/printPopulation.Rd new file mode 100644 index 0000000..cbb7469 --- /dev/null +++ b/man/printPopulation.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{printPopulation} +\alias{printPopulation} +\title{Prints a population of model objects as text.} +\usage{ +printPopulation(obj, method = "short", score = "fit_", indent = "") +} +\arguments{ +\item{obj:}{a population of models to plot} + +\item{method:}{if "digested" a short sumary (one line) will be printed, otherwise the method will contain the +specific way to print a model through the printModel() routine} + +\item{score:}{which score to show in the fit (default:fit_)} + +\item{indent:}{a string (default:'tab---') that will precede each element of the object.} +} +\description{ +Prints a population of model objects as text +} diff --git a/man/printy.Rd b/man/printy.Rd new file mode 100644 index 0000000..bf6e322 --- /dev/null +++ b/man/printy.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.visu.R +\name{printy} +\alias{printy} +\title{Prints as text the detail on a given object from the predomics package.} +\usage{ +printy(obj) +} +\arguments{ +\item{obj:}{an object from the predomics object} +} +\description{ +This function will summarize any of the predomics package objects such as can be an Experiment, +a Model, a Population of models or a ModelCollection +} diff --git a/man/resetTags.Rd b/man/resetTags.Rd new file mode 100644 index 0000000..fc9fd67 --- /dev/null +++ b/man/resetTags.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{resetTags} +\alias{resetTags} +\title{Resets selection, mutation and mate tags to inactive} +\usage{ +resetTags(pop, selected = FALSE, toBeMutated = FALSE, mate = -1) +} +\arguments{ +\item{pop:}{The population to be evolved} + +\item{selected:}{set to (default:FALSE) the selected attribute, if not null.} + +\item{toBeMutated:}{set to (default:FALSE) the selected attribute, if not null.} + +\item{mate:}{set to (default:-1) the selected attribute, if not null.} +} +\value{ +A modified population +} +\description{ +Resets selection, mutation and mate tags to inactive +} diff --git a/man/runClassifier.Rd b/man/runClassifier.Rd new file mode 100644 index 0000000..f1f8f54 --- /dev/null +++ b/man/runClassifier.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predomics.R +\name{runClassifier} +\alias{runClassifier} +\title{Runs the learning on a dataset} +\usage{ +runClassifier(X, y, clf, x_test = NULL, y_test = NULL) +} +\arguments{ +\item{X:}{The dataset to classify} + +\item{y:}{The variable to predict} + +\item{clf:}{The classifier object containing the different settings of the classifier.} + +\item{x_test:}{if not NULL (default) this dataset will be used to evaluate the models in a subset for the feature importance} + +\item{y_test:}{if not NULL (default) this dataset will be used to evaluate the models in a subset for the feature importance} +} +\value{ +the classifier along with the classification results as a sub-element +} +\description{ +This function runs a classifier in a given dataset +} diff --git a/man/runCrossval.Rd b/man/runCrossval.Rd new file mode 100644 index 0000000..927e91d --- /dev/null +++ b/man/runCrossval.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predomics.R +\name{runCrossval} +\alias{runCrossval} +\title{Compute the cross-validation emprirical and generalization scores} +\usage{ +runCrossval(X, y, clf, lfolds = NULL, nfolds = 10, return.all = FALSE) +} +\arguments{ +\item{X:}{the data matrix with variables in the rows and observations in the columns} + +\item{y:}{the response vector} + +\item{clf:}{the classifier parameter object} + +\item{nfolds:}{the number of folds for the cross-validation} + +\item{return.all:}{return all results from the crossvalidation for feature stability testing} +} +\value{ +a list containing empirical, generalisation scores for each fold as well as a matrix with the mean values. +} +\description{ +Compute the cross-validation emprirical and generalization scores. +} diff --git a/man/savePopulation.Rd b/man/savePopulation.Rd new file mode 100644 index 0000000..4ddd252 --- /dev/null +++ b/man/savePopulation.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{savePopulation} +\alias{savePopulation} +\title{Save a population to a file} +\usage{ +savePopulation(pop, fileName, compress = TRUE) +} +\arguments{ +\item{pop:}{The population to be saved} + +\item{fileName:}{The name of the file were you want to save the population} +} +\description{ +You can use this function to save a population to a file on you're disk (it will be in your working directory) +} diff --git a/man/saveResults.Rd b/man/saveResults.Rd new file mode 100644 index 0000000..495c369 --- /dev/null +++ b/man/saveResults.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{saveResults} +\alias{saveResults} +\title{Save the results of the fit function} +\usage{ +saveResults(fitResults, fileName, compress = TRUE) +} +\description{ +Save the results of the fit function +} diff --git a/man/scoreRatio.Rd b/man/scoreRatio.Rd new file mode 100644 index 0000000..457bc3f --- /dev/null +++ b/man/scoreRatio.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{scoreRatio} +\alias{scoreRatio} +\title{Computes the ^y score of the model as a ratio} +\usage{ +scoreRatio(class_1_score, class_2_score, epsilon = NULL) +} +\arguments{ +\item{class_1_score:}{the sum score for the features of class 1} + +\item{class_2_score:}{the sum score for the features of class 2} + +\item{epsilon:}{is a very small value that will would avoid Inf values in the ratio. This can be either specified in the when setting the classifier and if not specified will be set as the minimum number of the machine (e.g. 2.23e-308). Caution this should be adapted when working with other types of data.} +} +\value{ +a vector containing the predicted ^y score for each observation +} +\description{ +Computes the ^y score of the model as a ratio +} diff --git a/man/selectBestPopulation.Rd b/man/selectBestPopulation.Rd new file mode 100644 index 0000000..cfb3878 --- /dev/null +++ b/man/selectBestPopulation.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{selectBestPopulation} +\alias{selectBestPopulation} +\title{Select the top significant best part of the population} +\usage{ +selectBestPopulation(pop, score = "fit_", p = 0.05, k_penalty = 0, k_max = 0) +} +\arguments{ +\item{pop:}{a list of model objects} + +\item{score:}{the attribute of the model to be used for the evaluation} + +\item{p:}{the p-value threshold} + +\item{k_penalty:}{the penalty to apply to the score based on the k_sparsity (default:0)} + +\item{k_max:}{select the best population below a given threshold. If (default:0) no selection is performed.} +} +\value{ +a sub part of the population +} +\description{ +This function allows to select the best part of a population that is significantly not different from the best model +} diff --git a/man/selector_v1.Rd b/man/selector_v1.Rd new file mode 100644 index 0000000..6badd06 --- /dev/null +++ b/man/selector_v1.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{selector_v1} +\alias{selector_v1} +\title{Does an elite selection on a population} +\usage{ +selector_v1(pop, number, clf) +} +\description{ +This function is a template for other selectors, it takes a population and a number of individuals to select. The result is the \code{number} bests element of the population. +} diff --git a/man/sim_inter.Rd b/man/sim_inter.Rd new file mode 100644 index 0000000..907049a --- /dev/null +++ b/man/sim_inter.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stability.lib.R +\name{sim_inter} +\alias{sim_inter} +\title{compare stability of different modeles (inter k)} +\usage{ +sim_inter(tmp, X) +} +\arguments{ +\item{X:}{dataset to classify} + +\item{tmp:}{the digested result from digest} +} +\value{ +a num +} +\description{ +This function compares stability of different modeles (inter k) +} diff --git a/man/sim_intra.Rd b/man/sim_intra.Rd new file mode 100644 index 0000000..f908343 --- /dev/null +++ b/man/sim_intra.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stability.lib.R +\name{sim_intra} +\alias{sim_intra} +\title{compare stability of different modeles (intra k)} +\usage{ +sim_intra(tmp, X) +} +\arguments{ +\item{X:}{dataset to classify} + +\item{tmp:}{the digested result from digest} +} +\value{ +a num +} +\description{ +This function compares stability of different modeles (intra k) +} diff --git a/man/sortPopulation.Rd b/man/sortPopulation.Rd new file mode 100644 index 0000000..2fd0c0e --- /dev/null +++ b/man/sortPopulation.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{sortPopulation} +\alias{sortPopulation} +\title{sortPopulation} +\usage{ +sortPopulation(pop, evalToOrder = "fit_", decreasing = TRUE) +} +\arguments{ +\item{pop:}{a population (list) of evaluated predomics objects} + +\item{evalToOrder:}{the attribute to be used in the sorting (default:fit_)} + +\item{decreasing:}{whether the sorting should be be decreasing or not (default:decreasing)} +} +\value{ +a sorted population of predomics objects +} +\description{ +Sort a population according to a given attribute (evalToOrder) +} diff --git a/man/sota.glmnet.Rd b/man/sota.glmnet.Rd new file mode 100644 index 0000000..0d584cf --- /dev/null +++ b/man/sota.glmnet.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sota.glmnet.R +\name{sota.glmnet} +\alias{sota.glmnet} +\title{sota.glmnet} +\usage{ +sota.glmnet(...) +} +\value{ +an object containing a list of parameters for this classifier +} +\description{ +sota.glmnet herits from terda and does not use the randomized rounding, using thus only the glmnet component +} +\details{ +sota.glmnet: sota.glmnet classifier parameter function +} diff --git a/man/sota.rf.Rd b/man/sota.rf.Rd new file mode 100644 index 0000000..91651ab --- /dev/null +++ b/man/sota.rf.Rd @@ -0,0 +1,118 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sota.rf.R +\name{sota.rf} +\alias{sota.rf} +\title{sota.rf} +\usage{ +sota.rf( + sparsity = c(1:30), + objective = "auc", + max.nb.features = 1000, + intercept = "NULL", + language = "rf", + evalToFit = "auc_", + k_penalty = 0, + ntree = 500, + mtry = NULL, + replace = TRUE, + classwt = NULL, + sampsize = NULL, + nodesize = NULL, + maxnodes = NULL, + importance = FALSE, + localImp = FALSE, + nPerm = 1, + norm.votes = TRUE, + do.trace = FALSE, + keep.forest = TRUE, + corr.bias = FALSE, + keep.inbag = FALSE, + popSaveFile = "NULL", + seed = "NULL", + nCores = 4, + verbose = TRUE, + plot = FALSE, + warnings = FALSE, + debug = FALSE, + print_ind_method = "short", + experiment.id = NULL, + experiment.description = NULL, + experiment.save = "nothing" +) +} +\arguments{ +\item{language}{is the language that is used by the different algorithms {bin, bininter, ter, terinter, ratio}, (default:"sota")} + +\item{sparsity:}{number of features in a given model. This is a vector with multiple lengths.} + +\item{objective:}{prediction mode (default: auc)} + +\item{max.nb.features:}{create the glmnet object using only the top most significant features (default:1000)} + +\item{intercept:}{(Interceot for the a given model) (default:NULL)} + +\item{evalToFit:}{Which model property will be used to select the best model among different k_sparsities (default: auc_)} + +\item{k_penalty:}{Penalization of the fit by the k_sparsity (default: 0)} + +\item{ntree:}{??} + +\item{mtry:}{Number of variables randomly sampled as candidates at each split. Note that the default values are different for classification (sqrt(p) where p is number of variables in x) and regression (p/3)} + +\item{replace:}{Should sampling of cases be done with or without replacement?} + +\item{classwt:}{Priors of the classes. Need not add up to one. Ignored for regression.} + +\item{sampsize:}{Size(s) of sample to draw. For classification, if sampsize is a vector of the length the number of strata, then sampling is stratified by strata, and the elements of sampsize indicate the numbers to be drawn from the strata.} + +\item{nodesize:}{Minimum size of terminal nodes. Setting this number larger causes smaller trees to be grown (and thus take less time). Note that the default values are different for classification (1) and regression (5).} + +\item{maxnodes:}{Maximum number of terminal nodes trees in the forest can have. If not given, trees are grown to the maximum possible (subject to limits by nodesize). If set larger than maximum possible, a warning is issued.} + +\item{importance:}{??} + +\item{localImp:}{??} + +\item{nPerm:}{??} + +\item{norm.votes:}{(??)} + +\item{do.trace:}{??} + +\item{keep.forest:}{??} + +\item{cor.bias:}{??} + +\item{keep.inbag:}{??} + +\item{popSaveFile:}{(??)} + +\item{seed:}{the seed to be used for reproductibility. If seed=NULL than it is not taken into account (default:NULL).} + +\item{nCores:}{the number of CPUs to run the programm in parallel} + +\item{plot:}{Plot graphics indicating the evolution of the simulation (default:FALSE)} + +\item{verbose:}{print out information on the progress of the algorithm (default:TRUE)} + +\item{warnings:}{Print out warnings when runnig (default:FALSE).} + +\item{debug:}{print out information on the progress of the algorithm (default:FALSE)} + +\item{print_ind_method:}{One of c("short","graphical") indicates how to print a model and subsequently a population during the run (default:"short").} + +\item{experiment.id:}{The id of the experiment that is to be used in the plots and comparitive analyses (default is the learner's name, when not specified)} + +\item{experiment.description:}{A longer description of the experiment. This is important when many experiments are run and can also be printed in by the printExperiment function.} + +\item{experiment.save:}{Data from an experiment can be saved with different levels of completness, with options to be selected from c("nothing", "minimal", "full"), default is "minimal"} +} +\value{ +an object containing a list of parameters for this classifier +} +\description{ +sota.svm is a wrapper that executes svm using the same framework as for the predomics package. +} +\details{ +sota.rf: launching Random Forest classifier +} diff --git a/man/sota.svm.Rd b/man/sota.svm.Rd new file mode 100644 index 0000000..b006744 --- /dev/null +++ b/man/sota.svm.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sota.svm.R +\name{sota.svm} +\alias{sota.svm} +\title{sota.svm} +\usage{ +sota.svm( + sparsity = c(1:30), + objective = "auc", + max.nb.features = 1000, + intercept = 0, + language = "svm", + evalToFit = "auc_", + k_penalty = 0, + scaled = TRUE, + type = NULL, + kernel = "rbfdot", + kpar = "automatic", + C = c(1e-04, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000), + nu = 0.2, + epsilon.hp = 0.1, + prob.model = FALSE, + class.weights = NULL, + fit = TRUE, + cache = 40, + tol = 0.001, + shrinking = TRUE, + na.action = na.omit, + popSaveFile = "NULL", + seed = "NULL", + nCores = 4, + verbose = TRUE, + plot = FALSE, + warnings = FALSE, + debug = FALSE, + print_ind_method = "short", + experiment.id = NULL, + experiment.description = NULL, + experiment.save = "nothing" +) +} +\arguments{ +\item{language}{is the language that is used by the different algorithms {bin, bininter, ter, terinter, ratio}, (default:"sota")} + +\item{sparsity:}{number of features in a given model. This is a vector with multiple lengths.} + +\item{objective:}{prediction mode (default: auc)} + +\item{max.nb.features:}{create the glmnet object using only the top most significant features (default:1000)} + +\item{intercept:}{(Interceot for the a given model) (default:NULL)} + +\item{evalToFit:}{Which model property will be used to select the best model among different k_sparsities (default: auc_)} + +\item{k_penalty:}{Penalization of the fit by the k_sparsity (default: 0)} + +\item{scaled:}{??} + +\item{type:}{??} + +\item{kernel:}{??} + +\item{kpar:}{??} + +\item{C:}{(??)} + +\item{nu:}{??} + +\item{epsilon.hp:}{(??) (for the SVM)} + +\item{prob.model:}{??} + +\item{class.weights:}{??} + +\item{fit:}{??} + +\item{cache:}{(??)} + +\item{tol:}{??} + +\item{shrinking:}{??} + +\item{na.action:}{??} + +\item{popSaveFile:}{(??)} + +\item{seed:}{the seed to be used for reproductibility. If seed=NULL than it is not taken into account (default:NULL).} + +\item{nCores:}{the number of CPUs to run the program in parallel} + +\item{plot:}{Plot graphics indicating the evolution of the simulation (default:FALSE)} + +\item{verbose:}{print out information on the progress of the algorithm (default:TRUE)} + +\item{warnings:}{Print out warnings when runnig (default:FALSE).} + +\item{debug:}{print out information on the progress of the algorithm (default:FALSE)} + +\item{print_ind_method:}{One of c("short","graphical") indicates how to print a model and subsequently a population during the run (default:"short").} + +\item{experiment.id:}{The id of the experiment that is to be used in the plots and comparitive analyses (default is the learner's name, when not specified)} + +\item{experiment.description:}{A longer description of the experiment. This is important when many experiments are run and can also be printed in by the printExperiment function.} + +\item{experiment.save:}{Data from an experiment can be saved with different levels of completness, with options to be selected from c("nothing", "minimal", "full"), default is "minimal"} +} +\value{ +an object containing a list of parameters for this classifier +} +\description{ +sota.svm is a wrapper that executes svm using the same framework as for the predomics package. +} +\details{ +sota.svm: launching svm classifier +} diff --git a/man/sparseVecToModel.Rd b/man/sparseVecToModel.Rd new file mode 100644 index 0000000..1715d05 --- /dev/null +++ b/man/sparseVecToModel.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{sparseVecToModel} +\alias{sparseVecToModel} +\title{sparseVecToModel} +\usage{ +sparseVecToModel(X, y, v, clf, eval.all = FALSE, obj = NULL) +} +\arguments{ +\item{X:}{dataset} + +\item{y:}{labels} + +\item{v:}{A vector of indexes (example v=c(1,11))} + +\item{clf:}{classifier information} + +\item{eval.all:}{Should the model be evaluated (default:FALSE)} + +\item{obj:}{an object model to add to the model (default:NULL)} +} +\value{ +an model object +} +\description{ +Builds a model object based on model that is in the sparse (short) format. +} diff --git a/man/summarySE.Rd b/man/summarySE.Rd new file mode 100644 index 0000000..29ca14a --- /dev/null +++ b/man/summarySE.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{summarySE} +\alias{summarySE} +\title{Plot performance scores for multiple learners.} +\usage{ +summarySE( + data = NULL, + measurevar, + groupvars = NULL, + na.rm = FALSE, + conf.interval = 0.95, + .drop = TRUE +) +} +\arguments{ +\item{data:}{a data frame} + +\item{groupvars:}{a vector containing names of columns that contain grouping variables} + +\item{na.rm:}{a boolean that indicates whether to ignore NA's} + +\item{conf.interval:}{the percent range of the confidence interval (default is 95\%)} +} +\value{ +A transformed data frame with information on the different errors and confidence. +} +\description{ +summarySE gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95\%). +} diff --git a/man/t2d.Rd b/man/t2d.Rd new file mode 100644 index 0000000..14ece6f --- /dev/null +++ b/man/t2d.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{t2d} +\alias{t2d} +\title{Type 2 diabetes (frequencies) BGI} +\description{ +This dataset consists of frequency abundance files as downloaded from http://waldronlab.io/curatedMetagenomicData/ +This is a list containing two elements: (i) the X data matrix with 1045 species and 344 observations and (ii) patient class = -1 (n=170) and healthy controls (n=174) +} +\author{ +Qin, Junjie, Yingrui Li, Zhiming Cai, Shenghui Li, Jianfeng Zhu, Fan Zhang, Suisha Liang, et al “A metagenome-wide association study of gut microbiota in type 2 diabetes.” Nature (September 26, 2012). +} +\keyword{2} +\keyword{diabetes,} +\keyword{microbiome,} +\keyword{species} +\keyword{type} diff --git a/man/t2dw.Rd b/man/t2dw.Rd new file mode 100644 index 0000000..f7bf04f --- /dev/null +++ b/man/t2dw.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{t2dw} +\alias{t2dw} +\title{Type 2 diabetes (frequencies) Women Sweden} +\description{ +This dataset consists of frequency abundance files as downloaded from http://waldronlab.io/curatedMetagenomicData/ +This is a list containing two elements: (i) the X data matrix with 1045 species and 145 observations and (ii) patient class = -1 (n=53) and healthy controls (n=43) +Caution, this dataset has also a class 0 with IG patients, which needs to be omited from both X and y +} +\author{ +Karlsson, Fredrik H, Valentina Tremaroli, Intawat Nookaew, Göran Bergström, Carl Johan Behre, Björn Fagerberg, Jens Nielsen, and Fredrik Bäckhed. “Gut metagenome in European women with normal, impaired and diabetic glucose control.” Nature (May 29, 2013): 1–7. +} +\keyword{2} +\keyword{diabetes,} +\keyword{microbiome,} +\keyword{species} +\keyword{type} diff --git a/man/tag_Couples.Rd b/man/tag_Couples.Rd new file mode 100644 index 0000000..66b02c2 --- /dev/null +++ b/man/tag_Couples.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{tag_Couples} +\alias{tag_Couples} +\title{Tag the couples} +\usage{ +tag_Couples(pop, parents) +} +\arguments{ +\item{pop:}{The population on which we the couples are being constituted;} + +\item{parents:}{The parent candidate individuals from which the couples will +be selected.} +} +\value{ +The parent population with the couple tags set. +} +\description{ +This function selects constitutes the couples that will give the +next generation individuals by adding the couple id on the mate attribute. +} diff --git a/man/tag_SelectElite.Rd b/man/tag_SelectElite.Rd new file mode 100644 index 0000000..575312e --- /dev/null +++ b/man/tag_SelectElite.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{tag_SelectElite} +\alias{tag_SelectElite} +\title{Tag individuals for parenting} +\usage{ +tag_SelectElite(clf, pop, nbToSelect) +} +\arguments{ +\item{clf:}{the classifier object} + +\item{pop:}{the population on which we want to add the tag} + +\item{nbToSelect:}{the number of individuals we are going to select in the population} +} +\value{ +the population given as an input with `nbToSelect` bests individuals with `$selected = TRUE` +} +\description{ +Function to add the tag "selected" to the best individuals of the population +} diff --git a/man/tag_SelectRandom.Rd b/man/tag_SelectRandom.Rd new file mode 100644 index 0000000..a8393b3 --- /dev/null +++ b/man/tag_SelectRandom.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{tag_SelectRandom} +\alias{tag_SelectRandom} +\title{Randomly tag selected individuals parenting} +\usage{ +tag_SelectRandom(clf, pop, nbToSelect) +} +\arguments{ +\item{clf:}{The classifier object} + +\item{pop:}{The population on which the selection process will be performed.} + +\item{nbToSelect:}{the number of individuals we are going to select in the population} +} +\value{ +the population given as an input with `nbToSelect` individuals with `selected = TRUE` +} +\description{ +This function turns the selected switch on when an individual is +selected to survive the generation and be among the pool of parents for the +next generation. +} diff --git a/man/tag_ToBeMutated.Rd b/man/tag_ToBeMutated.Rd new file mode 100644 index 0000000..6136cbc --- /dev/null +++ b/man/tag_ToBeMutated.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{tag_ToBeMutated} +\alias{tag_ToBeMutated} +\title{Tag individuals for mutation} +\usage{ +tag_ToBeMutated(pop, mutate_size, protected = NULL) +} +\arguments{ +\item{pop:}{The population on which the individuals to be mutated will be selected} + +\item{mutate_size:}{The number of individuals to mutate} + +\item{protected:}{The index of individuals which should not be mutated.} +} +\value{ +The population given as an input with `mutate_size` individuals with `toBeMutated = TRUE` +} +\description{ +Function to add the tag "toBeMutated" to a randomly sampled part of +the population. Some individuals (the best ones) will be protected from the mutation +so that genetic decline does not happen. +} diff --git a/man/tag_select.Rd b/man/tag_select.Rd new file mode 100644 index 0000000..a10fae7 --- /dev/null +++ b/man/tag_select.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.lib.R +\name{tag_select} +\alias{tag_select} +\title{Add `selected` tag using elite and random selection} +\usage{ +tag_select(X, y, clf, pop) +} +\arguments{ +\item{X:}{Unused but still here for compatibility} + +\item{y:}{same as `X`} + +\item{clf:}{the classifier object where parameters are defined} + +\item{pop:}{the population on which we want to apply the selection} +} +\value{ +the population with the tag `selected` on some of the individuals +} +\description{ +This function combines \link[predomics]{tag_SelectElite} and \link[predomics]{tag_SelectRandom} +to tag the desired individuals in a population following the proportion given in the clf +} diff --git a/man/terBeam.Rd b/man/terBeam.Rd new file mode 100644 index 0000000..87c017b --- /dev/null +++ b/man/terBeam.Rd @@ -0,0 +1,105 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terbeam.R +\name{terBeam} +\alias{terBeam} +\title{terbeam} +\usage{ +terBeam( + sparsity = 1:5, + max.nb.features = 1000, + maxNbOfModels = 10000, + nbBest = round(maxNbOfModels/10), + nbVeryBest = round(maxNbOfModels/100), + final.pop.perc = 100, + popSaveFile = "NULL", + saveFiles = FALSE, + language = "terinter", + scoreFormula = scoreRatio, + epsilon = "NULL", + objective = "auc", + k_penalty = 0, + evalToFit = "auc_", + estimate_coefs = FALSE, + intercept = "NULL", + testAllSigns = FALSE, + plot = FALSE, + verbose = TRUE, + warnings = FALSE, + debug = FALSE, + print_ind_method = "short", + parallelize.folds = TRUE, + nCores = 4, + seed = "NULL", + experiment.id = "NULL", + experiment.description = "NULL", + experiment.save = "nothing" +) +} +\arguments{ +\item{language}{is the language that is used by the different algorithms {bin, bininter, ter, terinter, ratio}, (default:"terinter")} + +\item{sparsity:}{number of features in a given model. This is a vector with multiple lengths.} + +\item{maxNbOfModels:}{number of models to be explored for a given k_sparsity. This is equivalent to a population size in terga.} + +\item{nbVeryBest:}{is the number of features to be kept that appear in the very best models. They will be kept even if they are not frequent in the best models (default: 1 percent of maxNbOfModels).} + +\item{nbBest:}{is the number of features that will be used to build the k+1 sparsity combinations (default: 10 percent of maxNbOfModels).} + +\item{final.pop.perc:}{a percentage of nbVeryBest translates in a number of models to be kept for k_sparsity.} + +\item{popSaveFile:}{(??)} + +\item{saveFiles:}{??} + +\item{scoreFormula:}{a Function that contains the ratio Formula or other specific ones} + +\item{epsilon:}{a small value to be used with the ratio language (useCustomLanguage) (default: NULL). When null it is going to be calculated by the minimum value of X divided by 10.} + +\item{objective:}{this can be auc, cor or aic. Terga can also predict regression, other than class prediction. (default:auc)} + +\item{max.nb.features:}{focuses only on the subset of top most significant features (default:1000)} + +\item{estimate_coefs:}{non ternary solution for the aic objective (default:FALSE)} + +\item{evalToFit:}{The model performance attribute to use as fitting score (default:"fit_"). Other choices are c("auc_","accuracy_","precision_","recall_","f_score_")} + +\item{k_penalty:}{Penalization of the fit by the k_sparsity (default: 0)} + +\item{intercept:}{(??) (default:NULL)} + +\item{testAllSigns:}{??} + +\item{plot:}{Plot different graphics (default:FALSE).} + +\item{verbose:}{print out information on the progress of the algorithm (default:TRUE)} + +\item{warnings:}{Print out warnings when runnig (default:FALSE).} + +\item{debug:}{print debug information (default:FALSE)} + +\item{print_ind_method:}{One of c("short","graphical") indicates how to print a model and subsequently a population during the run (default:"short").} + +\item{nCores:}{the number of cores to execute the program. If nCores=1 than the program runs in a non parallel mode} + +\item{parallelize.folds:}{parallelize folds when cross-validating (default:TRUE)} + +\item{seed:}{the seed to be used for reproductibility. If seed=NULL than it is not taken into account (default:NULL).} + +\item{experiment.id:}{The id of the experiment that is to be used in the plots and comparitive analyses (default is the learner's name, when not specified)} + +\item{experiment.description:}{A longer description of the experiment. This is important when many experiments are run and can also be printed in by the printExperiment function.} + +\item{experiment.save:}{Data from an experiment can be saved with different levels of completness, with options to be selected from c("nothing", "minimal", "full"), default is "minimal"} + +\item{parallel:}{parallel} +} +\value{ +an object containing a list of parameters for this classifier +} +\description{ +terbeam is a model search algorithm on a beam search approach. +} +\details{ +terbeam: ternary beam searching algorithm +} diff --git a/man/terda.Rd b/man/terda.Rd new file mode 100644 index 0000000..2c0cfb6 --- /dev/null +++ b/man/terda.Rd @@ -0,0 +1,114 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terda.R +\name{terda} +\alias{terda} +\title{terda} +\usage{ +terda( + sparsity = 5, + nIterations = 5, + max.nb.features = 1000, + kBest = "NULL", + method = "glmnetRR", + kStep = "NULL", + vartype = "real", + gamma = 0.7, + nRR = 1, + lb = -1, + ub = 1, + language = "terinter", + scoreFormula = scoreRatio, + epsilon = "NULL", + nblambdas = 1000, + objective = "auc", + evalToFit = "auc_", + k_penalty = 0, + intercept = "NULL", + popSaveFile = "NULL", + final.pop.perc = 100, + alpha = 0.5, + plot = FALSE, + verbose = TRUE, + warnings = FALSE, + debug = FALSE, + print_ind_method = "short", + parallelize.folds = TRUE, + nCores = 4, + seed = "NULL", + experiment.id = "NULL", + experiment.description = "NULL", + experiment.save = "nothing" +) +} +\arguments{ +\item{language}{is the language that is used by the different algorithms {bin, bininter, ter, terinter, ratio}, (default:"terinter")} + +\item{sparsity:}{number of features in a given model. This is a vector with multiple lengths.} + +\item{nIterations:}{??} + +\item{max.nb.features:}{create the glmnet object using only the top most significant features (default:1000)} + +\item{kBest:}{??} + +\item{method:}{??} + +\item{kStep:}{??} + +\item{vartype:}{(??)} + +\item{gamma:}{??} + +\item{nRR:}{(??) (default:FALSE)} + +\item{lb:}{??} + +\item{ub:}{??} + +\item{scoreFormula:}{a Function that contains the ratio Formula or other specific ones} + +\item{epsilon:}{a small value to be used with the ratio language (useCustomLanguage) (default: NULL). When null it is going to be calculated by the minimum value of X divided by 10.} + +\item{objective:}{this can be auc, cor or aic. Terga can also predict regression, other than class prediction. (default:auc)} + +\item{evalToFit:}{The model performance attribute to use as fitting score (default:"fit_"). Other choices are c("auc_","accuracy_","precision_","recall_","f_score_")} + +\item{k_penalty:}{Penalization of the fit by the k_sparsity (default: 0)} + +\item{intercept:}{(??) (default:NULL)} + +\item{popSaveFile:}{(??)} + +\item{final.pop.perc:}{??} + +\item{plot:}{Plot different graphics (default:FALSE).} + +\item{verbose:}{print out information on the progress of the algorithm (default:TRUE)} + +\item{warnings:}{Print out warnings when runnig (default:FALSE).} + +\item{debug:}{print out debug infotmation when activated (default: FALSE)} + +\item{print_ind_method:}{One of c("short","graphical") indicates how to print a model and subsequently a population during the run (default:"short").} + +\item{parallelize.folds:}{parallelize folds when cross-validating (default:TRUE)} + +\item{nCores:}{the number of cores to execute the program. If nCores=1 than the program runs in a non parallel mode} + +\item{seed:}{the seed to be used for reproductibility. If seed=NULL than it is not taken into account (default:NULL).} + +\item{experiment.id:}{The id of the experiment that is to be used in the plots and comparitive analyses (default is the learner's name, when not specified)} + +\item{experiment.description:}{A longer description of the experiment. This is important when many experiments are run and can also be printed in by the printExperiment function.} + +\item{experiment.save:}{Data from an experiment can be saved with different levels of completness, with options to be selected from c("nothing", "minimal", "full"), default is "minimal"} +} +\value{ +an object containing a list of parameters for this classifier +} +\description{ +terbeam is a model search algorithm. +} +\details{ +terda: terda classifier parameter function +} diff --git a/man/terga1.Rd b/man/terga1.Rd new file mode 100644 index 0000000..d87a584 --- /dev/null +++ b/man/terga1.Rd @@ -0,0 +1,127 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga1.R +\name{terga1} +\alias{terga1} +\title{terga1} +\usage{ +terga1( + sparsity = c(1:10), + size_pop = 100, + size_world = "NULL", + max.nb.features = 1000, + popSourceFile = "NULL", + popSaveFile = "NULL", + language = "terinter", + scoreFormula = scoreRatio, + epsilon = "NULL", + unique_vars = FALSE, + objective = "auc", + k_penalty = 0, + evalToFit = "fit_", + estimate_coefs = FALSE, + intercept = "NULL", + select_type = "mixed", + select_perc1 = 20, + select_perc2 = 30, + perc_best_ancestor = 10, + mutate_size = 70, + mutate_rate = 50, + nb_generations = 100, + convergence = TRUE, + convergence_steps = 10, + evolve_k1 = TRUE, + plot = FALSE, + verbose = TRUE, + warnings = FALSE, + debug = FALSE, + print_ind_method = "short", + parallelize.folds = TRUE, + nCores = 4, + seed = "NULL", + experiment.id = "NULL", + experiment.description = "NULL", + experiment.save = "nothing" +) +} +\arguments{ +\item{language}{is the language that is used by the different algorithms {bin, bininter, ter, terinter, ratio}, (default:"terinter")} + +\item{sparsity:}{number of features in a given model. This is a vector with multiple lengths.} + +\item{size_pop:}{the number of individuals in a population to be evolved.} + +\item{size_world:}{this is the number of features in the dataset.} + +\item{max.nb.features:}{focuses only on the subset of top most significant features (default:1000)} + +\item{popSourceFile:}{A population of models that can start as a first generation to be evolved (default:NULL).} + +\item{popSaveFile:}{(??)} + +\item{scoreFormula:}{a Function that contains the ratio Formula or other specific ones} + +\item{epsilon:}{a small value to be used with the ratio language (default: NULL). When null it is going to be calculated by the minimum value of X divided by 10.} + +\item{unique_vars:}{logical (default: FALSE) indicates weather unique variables can be used in a model or population.} + +\item{objective:}{this can be auc, cor or aic. Terga can also predict regression, other than class prediction. (default:auc)} + +\item{estimate_coefs:}{non ternary solution for the aic objective (default:FALSE)} + +\item{intercept:}{(Interceot for the a given model) (default:NULL)} + +\item{evalToFit:}{The model performance attribute to use as fitting score (default:"fit_"). Other choices are c("auc_","accuracy_","precision_","recall_","f_score_")} + +\item{k_penalty:}{Penalization of the fit by the k_sparsity (default: 0)} + +\item{select_type:}{the selection operator type. can be mixed, elite or tournoi (default: mixed)} + +\item{select_perc1:}{percentage of individuals to be selected with elite} + +\item{select_perc2:}{percentage of individuals to be selected with tournoi} + +\item{perc_best_ancestor:}{percentage of best ancentors as seeding in the new population} + +\item{mutate_size:}{percentage of individuals in the population to be mutated} + +\item{mutate_rate:}{percentage of features in an individual to be mutated} + +\item{plot:}{plot graphics indicating the evolution of the simulation (default:FALSE)} + +\item{convergence:}{should the algorithm converge when the best individual is not improving (default:TRUE).} + +\item{convergence_steps:}{the number of generations after which we consider convergence (default:10).} + +\item{evolve_k1:}{weather or not to evaluate exhaustively the features for k_sparse=1. This will take a lot of time if the dataset is large, thus the possibility to evolve this using the GA. (default:TRUE)} + +\item{verbose:}{print out information on the progress of the algorithm (default:TRUE)} + +\item{warnings:}{Print out warnings when runnig (default:FALSE).} + +\item{debug:}{print debug information (default:FALSE)} + +\item{print_ind_method:}{One of c("short","graphical") indicates how to print a model and subsequently a population during the run (default:"short").} + +\item{parallelize.folds:}{parallelize folds when cross-validating (default:TRUE)} + +\item{nb_generations:}{maximum number of generations to evolve the population.} + +\item{nCores:}{the number of cores to execute the program. If nCores=1 than the program runs in a non parallel mode} + +\item{seed:}{the seed to be used for reproductibility. If seed=NULL than it is not taken into account (default:NULL).} + +\item{experiment.id:}{The id of the experiment that is to be used in the plots and comparitive analyses (default is the learner's name, when not specified)} + +\item{experiment.description:}{A longer description of the experiment. This is important when many experiments are run and can also be printed in by the printExperiment function.} + +\item{experiment.save:}{Data from an experiment can be saved with different levels of completness, with options to be selected from c("nothing", "minimal", "full"), default is "minimal"} +} +\value{ +an object containing a list of parameters for this classifier +} +\description{ +terga1 is a model search algorithm based on genetic algorithms (GA). A “genome” or “individual” in this context is a combination of features that will be associated together to compute a score that will be the prediction model. Depending on the type of fitting function that is maximized the fatures are weighed by specific coefficients. In short the algorithm is based on different operations such as crossing, mutating and evolving different “individuals” and evaluating their fitness to the “environment” which is represented by the variable to be predicted. +} +\details{ +terga1: Model search algorithm based on genetic algorithms (GA) +} diff --git a/man/terga2.Rd b/man/terga2.Rd new file mode 100644 index 0000000..6f26711 --- /dev/null +++ b/man/terga2.Rd @@ -0,0 +1,196 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/terga2.R +\name{terga2} +\alias{terga2} +\title{Model search algorithm based on genetic algorithms (GA).} +\usage{ +terga2( + sparsity = c(1:10), + max.nb.features = 1000, + language = "terinter", + objective = "auc", + evalToFit = "accuracy_", + k_penalty = 0, + estimate_coefs = FALSE, + scoreFormula = scoreRatio, + epsilon = "NULL", + size_pop = 100, + size_pop_random = size_pop, + final.pop.perc = 100, + in_pop = "NULL", + popSourceFile = "NULL", + popSaveFile = "NULL", + individual_vec = individual_vec_v2, + randomSigns = FALSE, + unique_vars = FALSE, + select_perc = 25, + selector = list(selector_v1, selector_v2), + select_percByMethod = list(50, 50), + cross = TRUE, + crosser = crossingIndividual_v3, + mutate = TRUE, + mutate_size = 75, + mutate_rate = 50, + mutator = mutator_v2, + evolver = "v2m", + nb_generations = 100, + convergence = TRUE, + convergence_steps = 10, + evolve_k1 = TRUE, + plot = FALSE, + verbose = FALSE, + warnings = FALSE, + debug = FALSE, + print_ind_method = "short", + parallelize.folds = TRUE, + nCores = 4, + seed = "NULL", + maxTime = Inf, + experiment.id = "NULL", + experiment.description = "NULL", + experiment.save = "nothing" +) +} +\arguments{ +\item{language}{is the language that is used by the different algorithms +{bin, bininter, ter, terinter}, (default:"terinter")} + +\item{size_pop_random}{the number of individuals initialized randomly. This is used +by the metal algorithm (i.e. aggregator method).} + +\item{sparsity:}{number of features in a given model (default:1:10). +This is a vector with the model-size range (number of features used by a model).} + +\item{objective:}{This is the task that is to be learned and can be either classification +(auc) or can be a regression (cor) (default:auc).} + +\item{evalToFit:}{The model performance attribute to use as fitting score (default:"accuracy_"). +Other choices are c("accuracy_", "auc_", "precision_","recall_","f_score_") for the +classification task. It can be either rho, rho-squared or minimizing the +standar error of the regression for the regression task.} + +\item{k_penalty:}{Model-size penalization effect applied on the fit scpre (default: 0).} + +\item{estimate_coefs:}{_deprecated_ A particular option for the regression mode +with the aic objective (default:FALSE)} + +\item{max.nb.features:}{If this number is smaller than the number of variables in the +dataset, the max.nb.features most significant features will be selected and the +dataset will be restricted (default:1000).} + +\item{size_pop:}{the number of individuals in a population to be evolved (default:100)} + +\item{final.pop.perc:}{What percentage of the final population should be returned (default:100)} + +\item{in_pop:}{a specific population of models that can be evolved. This is particulary +useful for the metal algorithm} + +\item{popSourceFile:}{It is possible to load a population of models that has been +already learned before. With this option we can specify such file (default:NULL).} + +\item{popSaveFile:}{Once the population of models evolved, we can store it in +another file (default:NULL).} + +\item{scoreFormula:}{a Function that contains the ratio Formula or other specific ones} + +\item{epsilon:}{a very small value to be used with the ratio language +(useCustomLanguage) (default: NULL). When null it is going to be calculated by the +minimum value of X divided by 10.} + +\item{individual_vec:}{The function that is used to generate an individual +(default:individual_vec_v2).} + +\item{randomSigns:}{When generating an individual composed of a set of features, we +can set the coefficients of the variables from -1 or 1 randomly (default:FALSE).} + +\item{unique_vars:}{When performing operations on multiple individuals it can be +that in an individual we have multiple time the same feature. If set to TRUE this +individual will be destroyed (default:FALSE)} + +\item{select_perc:}{The percentage of the population to be selected for crossing/mutation +(default:50)} + +\item{selector:}{During the selection process, the parent population can be +selected using different strategies. For instance the default process is performed +using both elite and random selection (default:list(selector_v1, selector_v2)).} + +\item{select_percByMethod:}{A list contaning the percentage of individuals that +each of the methods specified in selector should get.} + +\item{cross:}{A swithch, which activates the crossing operator (default:TRUE).} + +\item{crosser:}{The method that should be applied to cross individuals +together (default:crossingIndividual_v4).} + +\item{mutate:}{A swithch, which activates the mutation operator (default:TRUE).} + +\item{mutate_size:}{The percentage of individuals in the population to be mutated (default:70).} + +\item{mutate_rate:}{The percentage of features in an individual to be mutated (default:50).} + +\item{mutator:}{The method that should be applied to mutate individuals (default:mutator_v2). +The operations can be, deletion, insertion or changing the coeffiecient (from -1 to 1 +and vice-versa).} + +\item{evolver:}{The method that will be used to evolve the individuals together. +This is the core of the algorithm and can be one of different implementations +c("v1", "v2", "v3","v4") where the default one is "v4".} + +\item{nb_generations:}{The maximum number of generations to evolve the population.} + +\item{convergence:}{A switch which activates the automatic convergence of the algorithm +when the best individual is not improving (default:TRUE).} + +\item{convergence_steps:}{The number of generations after which we consider +convergence (default:10).} + +\item{evolve_k1:}{Whether or not to evaluate exhaustively the features for +model size = 1. This will take a lot of time if the dataset is large, thus the +possibility to evolve this using the GA is interesting. (default:TRUE)} + +\item{plot:}{Plot graphics indicating the evolution of the simulation (default:FALSE)} + +\item{verbose:}{Print out information on the progress of the algorithm (default:FALSE).} + +\item{warnings:}{Print out warnings when runnig (default:FALSE).} + +\item{debug:}{Print out detailed information on the progress of the algorithm +(default:FALSE)} + +\item{print_ind_method:}{One of c("short","graphical") indicates how to print +a model and subsequently a population during the run (default:"short").} + +\item{parallelize.folds:}{parallelize folds when cross-validating (default:TRUE).} + +\item{nCores:}{The number of cores to execute the program. If nCores = 1 than +the program runs in a non parallel mode} + +\item{seed:}{The seed to be used for reproductibility. If seed=NULL than it is +not taken into account (default:NULL).} + +\item{maxTime:}{We can use a time limit to evolve a population (default:Inf).} + +\item{experiment.id:}{The id of the experiment that is to be used in the plots +and comparitive analyses (default is the learner's name, when not specified)} + +\item{experiment.description:}{A longer description of the experiment. This is +important when many experiments are run and can also be printed in by the +printExperiment function.} + +\item{experiment.save:}{Data from an experiment can be saved with different +levels of completness, with options to be selected from +c("nothing", "minimal", "full"), default is "minimal"} +} +\value{ +an object of the classifier class, containing a list of parameters +} +\description{ +TerGA is a model search algorithm based on genetic algorithms (GA). +An “individual” (i.e. genome) in this context is a combination of features that +will be associated together using a selected "language" to compute a score that +will constitute the prediction model. Depending on the type of fitting (i.e. evaluation) +function that is maximized, the fatures are weighed by specific coefficients. +In short the algorithm is based on different operations such as crossing, mutating +and evolving different “individuals” and evaluating their fitness to the “environment” +which is represented by the variable to be predicted. +} diff --git a/man/updateModelIndex.Rd b/man/updateModelIndex.Rd new file mode 100644 index 0000000..236766d --- /dev/null +++ b/man/updateModelIndex.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{updateModelIndex} +\alias{updateModelIndex} +\title{updateModelIndex} +\usage{ +updateModelIndex(obj, features = NULL) +} +\arguments{ +\item{obj:}{the object is a model} + +\item{features:}{the list of features which overrides the clf$data$features if this exists.} +} +\value{ +the same object type as input, but updated +} +\description{ +Update the index of a model objectn. +} diff --git a/man/updateObjectIndex.Rd b/man/updateObjectIndex.Rd new file mode 100644 index 0000000..619b314 --- /dev/null +++ b/man/updateObjectIndex.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/global.lib.R +\name{updateObjectIndex} +\alias{updateObjectIndex} +\title{updateObjectIndex} +\usage{ +updateObjectIndex(obj, features = NULL) +} +\arguments{ +\item{obj:}{the object can be a model, population, or modelCollection} + +\item{features:}{the list of features which overrides the clf$data$features if this exists.} +} +\value{ +an the same object type as input, but updated +} +\description{ +Update the index of a model, population, or modelCollection. +}