From 8ad0a55a2ed0a6820f3bcee382f6f614d0bce238 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 1 Jul 2021 08:46:47 +1200 Subject: [PATCH] add to ScientificTypes name change --- ORGANIZATION.md | 8 ++++---- Project.toml | 4 ++-- README.md | 4 ++-- ROADMAP.md | 4 ++-- docs/make.jl | 8 ++++---- docs/src/adding_models_for_general_use.md | 4 ++-- docs/src/getting_started.md | 12 ++++++------ docs/src/preparing_data.md | 4 ++-- docs/src/quick_start_guide_to_adding_models.md | 2 +- docs/src/working_with_categorical_data.md | 6 +++--- paper/paper.md | 2 +- src/MLJ.jl | 6 +++--- src/scitypes.jl | 12 ++++++------ 13 files changed, 38 insertions(+), 38 deletions(-) diff --git a/ORGANIZATION.md b/ORGANIZATION.md index c72794980..7532a6218 100644 --- a/ORGANIZATION.md +++ b/ORGANIZATION.md @@ -13,14 +13,14 @@ its conventional use, are marked with a ⟂ symbol: evaluating and tuning machine learning models. It pulls in most code from other repositories described below. MLJ also hosts the [MLJ manual](src/docs) which documents functionality across the - repositories, with the exception of ScientificTypes, and + repositories, with the exception of ScientificTypesBase, and MLJScientific types which host their own documentation. (The MLJ manual and MLJTutorials do provide overviews of scientific types.) * [MLJModelInterface.jl](https://github.com/alan-turing-institute/MLJModelInterface.jl) is a lightweight package imported by packages implementing MLJ's interface for their machine learning models. It's *sole* - dependency is ScientificTypes, which is a tiny package with *no* + dependency is ScientificTypesBase, which is a tiny package with *no* dependencies. * (⟂) @@ -77,7 +77,7 @@ its conventional use, are marked with a ⟂ symbol: [Flux.jl](https://github.com/FluxML/Flux.jl), in MLJ. * (⟂) - [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) + [ScientificTypesBase.jl](https://github.com/alan-turing-institute/ScientificTypesBase.jl) is an ultra lightweight package providing "scientific" types, such as `Continuous`, `OrderedFactor`, `Image` and `Table`. It's purpose is to formalize conventions around the scientific @@ -85,7 +85,7 @@ its conventional use, are marked with a ⟂ symbol: `DataFrame`. * (⟂) - [MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl) + [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) articulates MLJ's own convention for the scientific interpretation of data. diff --git a/Project.toml b/Project.toml index ac10761c0..c4172195a 100644 --- a/Project.toml +++ b/Project.toml @@ -14,12 +14,12 @@ MLJEnsembles = "50ed68f4-41fd-4504-931a-ed422449fee0" MLJIteration = "614be32b-d00c-4edb-bd02-1eb411ab5e55" MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" MLJOpenML = "cbea4545-8c96-4583-ad3a-44078d60d369" -MLJScientificTypes = "2e2323e0-db8b-457b-ae0d-bdfb3bc63afd" MLJSerialization = "17bed46d-0ab5-4cd4-b792-a5c4b8547c6d" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" @@ -33,10 +33,10 @@ MLJEnsembles = "0.1" MLJIteration = "0.3" MLJModels = "0.14" MLJOpenML = "1" -MLJScientificTypes = "0.4.1" MLJSerialization = "1.1" MLJTuning = "0.6" ProgressMeter = "1.1" +ScientificTypes = "2" StatsBase = "0.32,0.33" Tables = "0.2,1.0" julia = "1.3" diff --git a/README.md b/README.md index 384317ee0..f34ca41e2 100644 --- a/README.md +++ b/README.md @@ -87,8 +87,8 @@ illustrated in the dependency chart below. MLJSerialization

- MLJScientificTypes  •  - ScientificTypes + ScientificTypes  •  + ScientificTypesBase


diff --git a/ROADMAP.md b/ROADMAP.md index 3a93c384d..de8877939 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -152,7 +152,7 @@ list](https://github.com/alan-turing-institute/MLJ.jl/issues/673). - [x] POC for implementation of time series models classification [#303](https://github.com/alan-turing-institute/MLJ.jl/issues/303), - [ScientificTypes #14](https://github.com/alan-turing-institute/ScientificTypes.jl/issues/14) POC is [here](https://github.com/alan-turing-institute/TimeSeriesClassification.jl) + [ScientificTypesBase #14](https://github.com/alan-turing-institute/ScientificTypesBase.jl/issues/14) POC is [here](https://github.com/alan-turing-institute/TimeSeriesClassification.jl) - [ ] POC for time series forecasting, along lines of sktime; probably needs [MLJBase #502](https://github.com/alan-turing-institute/MLJBase.jl/issues/502) @@ -178,7 +178,7 @@ list](https://github.com/alan-turing-institute/MLJ.jl/issues/673). - [x] missing value imputation using Gaussina Mixture Model. Done, via addition of BetaML model, `MissingImputator`. - - [ ] improve `autotype` method (from MLJScientificTypes), perhaps by + - [ ] improve `autotype` method (from ScientificTypes), perhaps by training on large collection of datasets with manually labelled scitype schema. diff --git a/docs/make.jl b/docs/make.jl index a24b8247e..15e2bf3b0 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -14,9 +14,9 @@ import MLJTuning import MLJModels import MLJEnsembles import MLJOpenML -import MLJScientificTypes -import MLJModelInterface import ScientificTypes +import MLJModelInterface +import ScientificTypesBase import Distributions using CategoricalArrays # avoid types like CategoricalArrays.Categorica using LossFunctions @@ -74,9 +74,9 @@ makedocs( MLJTuning, MLJModels, MLJEnsembles, - MLJScientificTypes, - MLJModelInterface, ScientificTypes, + MLJModelInterface, + ScientificTypesBase, MLJIteration, MLJSerialization, MLJOpenML, diff --git a/docs/src/adding_models_for_general_use.md b/docs/src/adding_models_for_general_use.md index a46011e2a..a77f915ce 100755 --- a/docs/src/adding_models_for_general_use.md +++ b/docs/src/adding_models_for_general_use.md @@ -39,7 +39,7 @@ It is assumed the reader has read [Getting Started](index.md). To implement the API described here, some familiarity with the following packages is also helpful: -- [MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl) +- [ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) (for specifying model requirements of data) - [Distributions.jl](https://github.com/JuliaStats/Distributions.jl) @@ -672,7 +672,7 @@ attempt to use your model with inappropriately typed data. The trait functions `input_scitype` and `target_scitype` take scientific data types as values. We assume here familiarity with -[MLJScientificTypes.jl](https://github.com/alan-turing-institute/MLJScientificTypes.jl) +[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl) (see [Getting Started](index.md) for the basics). For example, to ensure that the `X` presented to the diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md index 65010b335..c1df09cff 100644 --- a/docs/src/getting_started.md +++ b/docs/src/getting_started.md @@ -274,9 +274,9 @@ as `Array{Float32, 2}`). Similar remarks apply to the input `X` of an unsupervised model. Scientific types are julia types defined in the package -[ScientificTypes.jl](https://github.com/alan-turing-institute/ScientificTypes.jl); +[ScientificTypesBase.jl](https://github.com/alan-turing-institute/ScientificTypesBase.jl); the package -[MLJScientificTypes.jl](https://alan-turing-institute.github.io/MLJScientificTypes.jl/dev/) +[ScientificTypes.jl](https://alan-turing-institute.github.io/ScientificTypes.jl/dev/) implements the particular convention used in the MLJ universe for assigning a specific scientific type (interpretation) to each julia object (see the `scitype` examples below). @@ -286,7 +286,7 @@ The basic "scalar" scientific types are `Continuous`, `Multiclass{N}`, scientific types](@ref) below to guarantee your scalar data is interpreted correctly. Tools exist to coerce the data to have the appropriate scientfic type; see -[MLJScientificTypes.jl](https://alan-turing-institute.github.io/MLJScientificTypes.jl/dev/) +[ScientificTypes.jl](https://alan-turing-institute.github.io/ScientificTypes.jl/dev/) or run `?coerce` for details. Additionally, most data containers - such as tuples, vectors, matrices @@ -296,7 +296,7 @@ and tables - have a scientific type. ![](img/scitypes.png) *Figure 1. Part of the scientific type hierarchy in* -[ScientificTypes.jl](https://alan-turing-institute.github.io/MLJScientificTypes.jl/dev/). +[ScientificTypesBase.jl](https://alan-turing-institute.github.io/ScientificTypes.jl/dev/). ```@repl doda scitype(4.6) @@ -389,7 +389,7 @@ But see also [Model Search](@ref). ### Scalar scientific types Models in MLJ will always apply the `MLJ` convention described in -[MLJScientificTypes.jl](https://alan-turing-institute.github.io/MLJScientificTypes.jl/dev/) +[ScientificTypes.jl](https://alan-turing-institute.github.io/ScientificTypes.jl/dev/) to decide how to interpret the elements of your container types. Here are the key features of that convention: @@ -417,7 +417,7 @@ appropriate `Finite` (categorical) scitype. See [Working with Categorical Data] For more on scitype coercion of arrays and tables, see [`coerce`](@ref), [`autotype`](@ref) and [`unpack`](@ref) below and the examples at -[MLJScientificTypes.jl](https://alan-turing-institute.github.io/MLJScientificTypes.jl/dev/). +[ScientificTypes.jl](https://alan-turing-institute.github.io/ScientificTypes.jl/dev/). diff --git a/docs/src/preparing_data.md b/docs/src/preparing_data.md index c60c4bbdc..104f7bcdf 100644 --- a/docs/src/preparing_data.md +++ b/docs/src/preparing_data.md @@ -1,7 +1,7 @@ # Preparing Data As outlined in [Getting Started](@ref), it is important that the -[scientific type](https://github.com/JuliaAI/ScientificTypes.jl) of +[scientific type](https://github.com/JuliaAI/ScientificTypesBase.jl) of data matches the requirements of the model of interest. For example, while the majority of supervised learning models require input features to be `Continuous`, newcomers to MLJ are sometimes @@ -93,7 +93,7 @@ Pipelines](@ref). ## Scientific type coercion Scientific type coercion is documented in detail at -[ScientificTypes.jl](https://github.com/JuliaAI/ScientificTypes.jl). See +[ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase.jl). See also the tutorial at the [this MLJ Workshop](https://github.com/ablaom/MachineLearningInJulia2020) (specifically, diff --git a/docs/src/quick_start_guide_to_adding_models.md b/docs/src/quick_start_guide_to_adding_models.md index 8e3e4fe91..c58dcb9cb 100644 --- a/docs/src/quick_start_guide_to_adding_models.md +++ b/docs/src/quick_start_guide_to_adding_models.md @@ -8,7 +8,7 @@ learning models; (ii) that you would like to interface and register these models with MLJ; and (iii) that you have a rough understanding of how things work with MLJ. In particular you are familiar with: -- what [scientific types](https://github.com/alan-turing-institute/MLJScientificTypes.jl) are +- what [scientific types](https://github.com/alan-turing-institute/ScientificTypes.jl) are - what `Probabilistic`, `Deterministic` and `Unsupervised` models are diff --git a/docs/src/working_with_categorical_data.md b/docs/src/working_with_categorical_data.md index cb5cdb6a4..3974ca4cb 100644 --- a/docs/src/working_with_categorical_data.md +++ b/docs/src/working_with_categorical_data.md @@ -3,8 +3,8 @@ ## Scientific types for discrete data Recall that models articulate their data requirements using scientific -types (see [Getting Started](@ref) or the [MLJScientificTypes.jl -documentation](https://alan-turing-institute.github.io/MLJScientificTypes.jl/dev/)). There +types (see [Getting Started](@ref) or the [ScientificTypes.jl +documentation](https://alan-turing-institute.github.io/ScientificTypes.jl/dev/)). There are three scientific types discrete data can have: `Count`, `OrderedFactor` and `Multiclass`. @@ -62,7 +62,7 @@ above. To inspect all column scientific types in a table simultaneously, use `schema`. (The `scitype(X)` of a table `X` contains a condensed form of this information used in type dispatch; see -[here](https://github.com/alan-turing-institute/ScientificTypes.jl#more-on-the-table-type).) +[here](https://github.com/alan-turing-institute/ScientificTypesBase.jl#more-on-the-table-type).) ```@example hut import DataFrames.DataFrame diff --git a/paper/paper.md b/paper/paper.md index 6e05a37c5..468c4bde1 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -130,7 +130,7 @@ representation of probabilities, are avoided. **Scientific types** To help users focus less on data representation (e.g., `Float32`, `DataFrame`) and more on the intended *purpose* or *interpretation* of data, MLJ articulates model data requirements -using *scientific types* [@ScientificTypes], such as "continuous", +using *scientific types* [@ScientificTypesBase], such as "continuous", "ordered factor" or "table". **Connecting models directly to arbitrary data containers**. A diff --git a/src/MLJ.jl b/src/MLJ.jl index 2ae80f8cf..c8b1522f5 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -33,7 +33,7 @@ using ComputationalResources: CPUProcesses import MLJBase: fit, update, clean!, fit!, predict, fitted_params, show_as_constructed, == import MLJModels: models -import MLJScientificTypes +import ScientificTypes ## METHOD EXPORT @@ -47,7 +47,7 @@ export MLJ_VERSION export pdf, logpdf, mode, median, mean, shuffle!, categorical, shuffle, levels, levels!, std, support, sampler -# re-exports from (MLJ)ScientificTypes via MLJBase +# re-exports from (MLJ)ScientificTypesBase via MLJBase export Scientific, Found, Unknown, Known, Finite, Infinite, OrderedFactor, Multiclass, Count, Continuous, Textual, Binary, ColorImage, GrayImage, Image, Table @@ -222,6 +222,6 @@ const srcdir = dirname(@__FILE__) ## INCLUDE FILES include("version.jl") # defines MLJ_VERSION constant -include("scitypes.jl") # extensions to ScientificTypes.scitype +include("scitypes.jl") # extensions to ScientificTypesBase.scitype end # module diff --git a/src/scitypes.jl b/src/scitypes.jl index 314ce510d..537375d48 100644 --- a/src/scitypes.jl +++ b/src/scitypes.jl @@ -2,21 +2,21 @@ # This implementation of scitype for models and measures is highly experimental -const MST = MLJScientificTypes # only used in this file +const ST = ScientificTypes # only used in this file struct SupervisedScitype{input_scitype, target_scitype, prediction_type} end -MST.scitype(model::Deterministic, ::MST.MLJ) = +ST.scitype(model::Deterministic, ::ST.DefaultConvention) = SupervisedScitype{input_scitype(model), target_scitype(model), :deterministic} -MST.scitype(model::Probabilistic, ::MST.MLJ) = +ST.scitype(model::Probabilistic, ::ST.DefaultConvention) = SupervisedScitype{input_scitype(model), target_scitype(model), :probabilistic} -MST.scitype(model::Interval, ::MST.MLJ) = +ST.scitype(model::Interval, ::ST.DefaultConvention) = SupervisedScitype{input_scitype(model), target_scitype(model), :interval} @@ -52,7 +52,7 @@ end struct UnsupervisedScitype{input_scitype, output_scitype} end -MST.scitype(model::Unsupervised, ::MST.MLJ) = +ST.scitype(model::Unsupervised, ::ST.DefaultConvention) = UnsupervisedScitype{input_scitype(model), MLJBase.output_scitype(model)} @@ -91,7 +91,7 @@ struct MeasureScitype{target_scitype, is_feature_dependent, supports_weights} end -MST.scitype(measure, ::MST.MLJ, ::Val{:measure}) = +ST.scitype(measure, ::ST.DefaultConvention, ::Val{:measure}) = MeasureScitype{target_scitype(measure), prediction_type(measure), orientation(measure),