From 782da3ac2836a7763b3b6edfb03407464f52c79a Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 9 Nov 2023 15:54:26 +0100 Subject: [PATCH] Updates for weasel (#200) * Update maintenance scripts for weasel * Update README for weasel * Update project READMEs for weasel * Update spacy requirements across projects for weasel * CI: Add WEASEL_CONFIG_OVERRIDES * CI: Switch to python 3.8 --- .github/update_category_docs.py | 3 +- .github/update_projects_jsonl.py | 3 +- .github/workflows/tests.yml | 5 +- README.md | 46 +++++++++++-------- benchmarks/healthsea_spancat/README.md | 14 +++--- benchmarks/nel/README.md | 12 ++--- benchmarks/nel/project.yml | 1 - benchmarks/nel/requirements.txt | 1 + benchmarks/ner_conll03/README.md | 14 +++--- benchmarks/ner_embeddings/README.md | 14 +++--- benchmarks/parsing_penn_treebank/README.md | 14 +++--- .../pretraining_morphologizer_oscar/README.md | 14 +++--- .../project.yml | 2 - .../requirements.txt | 3 +- benchmarks/span-labeling-datasets/README.md | 14 +++--- benchmarks/span-labeling-datasets/project.yml | 2 - .../span-labeling-datasets/requirements.txt | 2 +- benchmarks/speed/README.md | 14 +++--- benchmarks/textcat_architectures/README.md | 14 +++--- benchmarks/ud_benchmark/README.md | 14 +++--- benchmarks/ud_benchmark/project.yml | 2 - benchmarks/ud_benchmark/requirements.txt | 1 + experimental/coref/README.md | 14 +++--- experimental/coref/project.yml | 2 - experimental/coref/requirements.txt | 1 + experimental/ner_spancat/README.md | 14 +++--- experimental/ner_spancat/project.yml | 1 - experimental/ner_spancat_compare/README.md | 14 +++--- experimental/ner_wikiner_speedster/README.md | 14 +++--- integrations/fastapi/README.md | 14 +++--- integrations/huggingface_hub/README.md | 14 +++--- integrations/prodigy/README.md | 14 +++--- integrations/ray/README.md | 14 +++--- integrations/streamlit/README.md | 10 ++-- integrations/wandb/README.md | 14 +++--- pipelines/edit_tree_lemmatizer/README.md | 14 +++--- pipelines/floret_fi_core_demo/README.md | 14 +++--- pipelines/floret_fi_core_demo/project.yml | 1 - .../floret_fi_core_demo/requirements.txt | 1 + pipelines/floret_ko_ud_demo/README.md | 14 +++--- pipelines/floret_ko_ud_demo/project.yml | 1 - pipelines/floret_ko_ud_demo/requirements.txt | 1 + pipelines/floret_vectors_demo/README.md | 12 ++--- pipelines/floret_vectors_demo/project.yml | 1 - .../floret_vectors_demo/requirements.txt | 1 + pipelines/floret_wiki_oscar_vectors/README.md | 14 +++--- .../floret_wiki_oscar_vectors/project.yml | 1 - .../requirements.txt | 2 + pipelines/ner_demo/README.md | 14 +++--- pipelines/ner_demo_replace/README.md | 14 +++--- pipelines/ner_demo_replace/project.yml | 1 - pipelines/ner_demo_replace/requirements.txt | 1 + pipelines/ner_demo_update/README.md | 14 +++--- pipelines/ner_demo_update/project.yml | 1 - pipelines/ner_demo_update/requirements.txt | 1 + pipelines/ner_wikiner/README.md | 14 +++--- pipelines/parser_demo/README.md | 14 +++--- pipelines/parser_intent_demo/README.md | 14 +++--- pipelines/polar_component/README.md | 14 +++--- pipelines/spancat_demo/README.md | 14 +++--- .../README.md | 14 +++--- .../project.yml | 1 - pipelines/tagger_parser_ud/README.md | 14 +++--- pipelines/tagger_parser_ud/project.yml | 2 - pipelines/textcat_demo/README.md | 14 +++--- pipelines/textcat_multilabel_demo/README.md | 14 +++--- tutorials/nel_emerson/README.md | 14 +++--- tutorials/nel_emerson/project.yml | 2 - tutorials/nel_emerson/requirements.txt | 3 +- tutorials/ner_double/README.md | 14 +++--- tutorials/ner_double/project.yml | 2 - tutorials/ner_drugs/README.md | 14 +++--- tutorials/ner_fashion_brands/README.md | 14 +++--- tutorials/ner_food_ingredients/README.md | 10 ++-- tutorials/ner_multiple_trials/README.md | 14 +++--- tutorials/ner_pytorch_medical/README.md | 14 +++--- tutorials/ner_tweets/README.md | 14 +++--- tutorials/ner_tweets/project.yml | 2 - tutorials/ner_tweets/requirements.txt | 2 + tutorials/parser_low_resource/README.md | 14 +++--- tutorials/rel_component/README.md | 14 +++--- tutorials/spancat_food_ingredients/README.md | 12 ++--- .../spanruler_restaurant_reviews/README.md | 14 +++--- tutorials/textcat_docs_issues/README.md | 14 +++--- tutorials/textcat_docs_issues/project.yml | 1 - tutorials/textcat_goemotions/README.md | 14 +++--- 86 files changed, 400 insertions(+), 403 deletions(-) diff --git a/.github/update_category_docs.py b/.github/update_category_docs.py index 207caeb4f..ba326b9a1 100644 --- a/.github/update_category_docs.py +++ b/.github/update_category_docs.py @@ -1,5 +1,6 @@ from pathlib import Path -from spacy.cli._util import PROJECT_FILE, load_project_config +from weasel.cli.main import PROJECT_FILE +from weasel.util import load_project_config from wasabi import msg, MarkdownRenderer import typer diff --git a/.github/update_projects_jsonl.py b/.github/update_projects_jsonl.py index 36339a7ba..eca1d1dd1 100644 --- a/.github/update_projects_jsonl.py +++ b/.github/update_projects_jsonl.py @@ -1,5 +1,6 @@ from pathlib import Path -from spacy.cli._util import PROJECT_FILE, load_project_config +from weasel.cli.main import PROJECT_FILE +from weasel.util import load_project_config from wasabi import msg import json import typer diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7a98d3beb..8b2d846e7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,6 +12,7 @@ on: env: # Make sure we're exiting training as early as possible SPACY_CONFIG_OVERRIDES: '--training.max_epochs=1 --training.max_steps=1' + WEASEL_CONFIG_OVERRIDES: '--training.max_epochs=1 --training.max_steps=1' WASABI_LOG_FRIENDLY: 1 jobs: @@ -23,9 +24,9 @@ jobs: matrix: include: - os: windows-2019 - python_version: "3.7" + python_version: "3.8" - os: ubuntu-20.04 - python_version: "3.7" + python_version: "3.8" runs-on: ${{ matrix.os }} steps: diff --git a/README.md b/README.md index d08ae2d49..0fd9d0888 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,19 @@ # ๐Ÿช Project Templates -[spaCy projects](https://spacy.io/usage/projects) let you manage and share -**end-to-end spaCy workflows** for different **use cases and domains**, and +[Weasel](https://github.com/explosion/weasel), previously +[spaCy projects](https://spacy.io/usage/projects), lets you manage and share +**end-to-end workflows** for different **use cases and domains**, and orchestrate training, packaging and serving your custom pipelines. You can start off by cloning a pre-defined project template, adjust it to fit your needs, load in your data, train a pipeline, export it as a Python package, upload your outputs to a remote storage and share your results with your team. -> โš ๏ธ spaCy project templates require [**spaCy v3**](https://spacy.io). You can -> install it from pip with `pip install spacy` or conda with -> `conda install spacy -c conda-forge`. Make sure to use a fresh virtual -> environment. +> โš ๏ธ Weasel project templates require +> [**Weasel**](https://github.com/explosion/weasel), which is also included by +> default with spaCy v3.7+. You can install it from pip with +> `pip install weasel` or conda with `conda install weasel -c conda-forge`. Make +> sure to use a fresh virtual environment. > > See the [`master` branch](https://github.com/explosion/projects/tree/master) > for the previous version of this repo. @@ -32,31 +34,35 @@ outputs to a remote storage and share your results with your team. ## ๐Ÿš€ Quickstart -Projects can be used via the new -[`spacy project`](https://spacy.io/api/cli#project) CLI. To find out more about -a command, add `--help`. For detailed instructions, see the -[usage guide](https://spacy.io/usage/projects). - - +Projects can be used via the +[`weasel`](https://github.com/explosion/weasel/blob/main/docs/cli.md) CLI, or +through the [`spacy project`](https://spacy.io/api/cli#project) alias. To find +out more about a command, add `--help`. For detailed instructions, see the +[Weasel documentation](https://github.com/explosion/weasel/tree/main#-documentation) +or [spaCy projects usage guide](https://spacy.io/usage/projects). 1. **Clone** the project template you want to use. ```bash - python -m spacy project clone tutorials/ner_fashion_brands + python -m weasel clone tutorials/ner_fashion_brands ``` -2. **Fetch assets** (data, weights) defined in the `project.yml`. +2. **Install** any project requirements. ```bash cd ner_fashion_brands - python -m spacy project assets + python -m pip install -r requirements.txt + ``` +3. **Fetch assets** (data, weights) defined in the `project.yml`. + ```bash + python -m weasel assets ``` -3. **Run a command** defined in the `project.yml`. +4. **Run a command** defined in the `project.yml`. ```bash - python -m spacy project run preprocess + python -m weasel run preprocess ``` -4. **Run a workflow** of multiple steps in order. +5. **Run a workflow** of multiple steps in order. ```bash - python -m spacy project run all + python -m weasel run all ``` -5. **Adjust** the template for **your specific use case**, load in your own +6. **Adjust** the template for **your specific use case**, load in your own data, adjust the settings and model and share the result with your team. ## ๐Ÿ‘ทโ€โ™€๏ธRepository maintanance diff --git a/benchmarks/healthsea_spancat/README.md b/benchmarks/healthsea_spancat/README.md index 49ada9741..2f0241fa1 100644 --- a/benchmarks/healthsea_spancat/README.md +++ b/benchmarks/healthsea_spancat/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Healthsea-Spancat +# ๐Ÿช Weasel Project: Healthsea-Spancat This spaCy project uses the Healthsea dataset to compare the performance between the Spancat and NER architecture. @@ -8,12 +8,12 @@ This spaCy project uses the Healthsea dataset to compare the performance between The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -29,7 +29,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -42,11 +42,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/annotation.jsonl` | URL | NER annotations exported from Prodigy with 5000 examples and 2 labels | - \ No newline at end of file + \ No newline at end of file diff --git a/benchmarks/nel/README.md b/benchmarks/nel/README.md index dcbe043c7..9730315a6 100644 --- a/benchmarks/nel/README.md +++ b/benchmarks/nel/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: NEL Benchmark +# ๐Ÿช Weasel Project: NEL Benchmark Pipeline for benchmarking NEL approaches (incl. candidate generation and entity disambiguation). @@ -8,12 +8,12 @@ Pipeline for benchmarking NEL approaches (incl. candidate generation and entity The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -36,7 +36,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -45,7 +45,7 @@ inputs have changed. | `all` | `download_mewsli9` → `download_model` → `wikid_clone` → `preprocess` → `wikid_download_assets` → `wikid_parse` → `wikid_create_kb` → `parse_corpus` → `compile_corpora` → `train` → `evaluate` → `compare_evaluations` | | `training` | `train` → `evaluate` | - + Notes: > **Warning**: Parts of this project are currently not platform-agnostic and run only on Linux. Making the entire diff --git a/benchmarks/nel/project.yml b/benchmarks/nel/project.yml index c0d2b9b32..0f7e93fee 100644 --- a/benchmarks/nel/project.yml +++ b/benchmarks/nel/project.yml @@ -1,6 +1,5 @@ title: 'NEL Benchmark' description: "Pipeline for benchmarking NEL approaches (incl. candidate generation and entity disambiguation)." -spacy_version: ">=3.0.0,<3.6.0" vars: run: "cg-default" language: "en" diff --git a/benchmarks/nel/requirements.txt b/benchmarks/nel/requirements.txt index a1ab4db43..cd618f9d3 100644 --- a/benchmarks/nel/requirements.txt +++ b/benchmarks/nel/requirements.txt @@ -7,3 +7,4 @@ rapidfuzz>=2.0.0 spacyfishing virtualenv pysqlite3-binary +spacy>=3.0.0,<3.6.0 diff --git a/benchmarks/ner_conll03/README.md b/benchmarks/ner_conll03/README.md index 797351622..8c8d449aa 100644 --- a/benchmarks/ner_conll03/README.md +++ b/benchmarks/ner_conll03/README.md @@ -1,17 +1,17 @@ - + -# ๐Ÿช spaCy Project: Named Entity Recognition (CoNLL-2003) +# ๐Ÿช Weasel Project: Named Entity Recognition (CoNLL-2003) ## ๐Ÿ“‹ project.yml The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -25,7 +25,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -36,7 +36,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -47,4 +47,4 @@ in the project directory. | `assets/conll2003/train.iob` | Local | Training data (not available publicly so you have to add the file yourself) | | `assets/orth_variants.json` | URL | A file containing orth variants for data augmentation | - \ No newline at end of file + \ No newline at end of file diff --git a/benchmarks/ner_embeddings/README.md b/benchmarks/ner_embeddings/README.md index a6466ff15..2b6cc6211 100644 --- a/benchmarks/ner_embeddings/README.md +++ b/benchmarks/ner_embeddings/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Comparing embedding layers in spaCy +# ๐Ÿช Weasel Project: Comparing embedding layers in spaCy This project contains the code to reproduce the results of the [Multi hash embeddings in spaCy](https://arxiv.org/abs/2212.09255) technical report by Explosion. @@ -29,12 +29,12 @@ the hash embedding layers. We apologize for the inconvenience. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -54,7 +54,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -66,7 +66,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -76,4 +76,4 @@ in the project directory. | `assets/fasttext.nl.gz` | URL | Dutch fastText vectors. | | `span-labeling-datasets` | Git | | - + diff --git a/benchmarks/parsing_penn_treebank/README.md b/benchmarks/parsing_penn_treebank/README.md index bbdc38834..f8ac538a8 100644 --- a/benchmarks/parsing_penn_treebank/README.md +++ b/benchmarks/parsing_penn_treebank/README.md @@ -1,17 +1,17 @@ - + -# ๐Ÿช spaCy Project: Dependency Parsing (Penn Treebank) +# ๐Ÿช Weasel Project: Dependency Parsing (Penn Treebank) ## ๐Ÿ“‹ project.yml The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -25,7 +25,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -36,7 +36,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -47,4 +47,4 @@ in the project directory. | `assets/vectors.zip` | URL | GloVe vectors | | `assets/orth_variants.json` | URL | A file containing orth variants for data augmentation | - + diff --git a/benchmarks/pretraining_morphologizer_oscar/README.md b/benchmarks/pretraining_morphologizer_oscar/README.md index bd01348c3..3e634d815 100644 --- a/benchmarks/pretraining_morphologizer_oscar/README.md +++ b/benchmarks/pretraining_morphologizer_oscar/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Enhancing Morphological Analysis with spaCy Pretraining +# ๐Ÿช Weasel Project: Enhancing Morphological Analysis with spaCy Pretraining This project explores the effectiveness of pretraining techniques on morphological analysis (morphologizer) by conducting experiments on multiple languages. The objective of this project is to demonstrate the benefits of pretraining word vectors using domain-specific data on the performance of the morphological analysis. We leverage the OSCAR dataset to pretrain our vectors for tok2vec and utilize the UD_Treebanks dataset to train a morphologizer component. We evaluate and compare the performance of different pretraining techniques and the performance of models without any pretraining. @@ -8,12 +8,12 @@ This project explores the effectiveness of pretraining techniques on morphologic The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -43,7 +43,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -59,11 +59,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/ud-treebanks-v2.5.tgz` | URL | | - \ No newline at end of file + \ No newline at end of file diff --git a/benchmarks/pretraining_morphologizer_oscar/project.yml b/benchmarks/pretraining_morphologizer_oscar/project.yml index cc750f51a..0bf263333 100644 --- a/benchmarks/pretraining_morphologizer_oscar/project.yml +++ b/benchmarks/pretraining_morphologizer_oscar/project.yml @@ -17,8 +17,6 @@ vars: # Choose -1 for CPU gpu: -1 -spacy_version: ">=3.5.2,<4.0.0" - # These are the directories that the project needs. The project CLI will make # sure that they always exist. directories: ["assets", "scripts", "data", "training", "pretraining", "metrics"] diff --git a/benchmarks/pretraining_morphologizer_oscar/requirements.txt b/benchmarks/pretraining_morphologizer_oscar/requirements.txt index 1f029ba77..adf32a462 100644 --- a/benchmarks/pretraining_morphologizer_oscar/requirements.txt +++ b/benchmarks/pretraining_morphologizer_oscar/requirements.txt @@ -1,4 +1,5 @@ spacy datasets spacy-transformers -matplotlib \ No newline at end of file +matplotlib +spacy>=3.5.2,<4.0.0 diff --git a/benchmarks/span-labeling-datasets/README.md b/benchmarks/span-labeling-datasets/README.md index 7f9d1e719..19bf07a3e 100644 --- a/benchmarks/span-labeling-datasets/README.md +++ b/benchmarks/span-labeling-datasets/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Span labeling datasets +# ๐Ÿช Weasel Project: Span labeling datasets This project compiles various NER and more general spancat datasets and their converters into the [spaCy format](https://spacy.io/api/data-formats). @@ -12,12 +12,12 @@ or to potentially pre-train them for your application. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -47,7 +47,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -63,7 +63,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -78,4 +78,4 @@ in the project directory. | `assets/restaurant-train_raw.iob` | URL | Training data from the MIT Restaurants Review dataset | | `assets/restaurant-test_raw.iob` | URL | Test data from the MIT Restaurants Review dataset | - + diff --git a/benchmarks/span-labeling-datasets/project.yml b/benchmarks/span-labeling-datasets/project.yml index bc35bc56f..3f3eb40f8 100644 --- a/benchmarks/span-labeling-datasets/project.yml +++ b/benchmarks/span-labeling-datasets/project.yml @@ -5,8 +5,6 @@ description: | You can use this to try out experiment with `ner` and `spancat` or to potentially pre-train them for your application. -spacy_version: ">=3.2.5,<4.0.0" - vars: spans_key: "sc" gpu_id: 0 diff --git a/benchmarks/span-labeling-datasets/requirements.txt b/benchmarks/span-labeling-datasets/requirements.txt index 081118921..0edc76fa7 100644 --- a/benchmarks/span-labeling-datasets/requirements.txt +++ b/benchmarks/span-labeling-datasets/requirements.txt @@ -1,4 +1,4 @@ -spacy +spacy>=3.2.5,<4.0.0 typer wasabi pandas diff --git a/benchmarks/speed/README.md b/benchmarks/speed/README.md index 9c386475f..3db16c565 100644 --- a/benchmarks/speed/README.md +++ b/benchmarks/speed/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Project for speed benchmarking of various pretrained models of different NLP libraries. +# ๐Ÿช Weasel Project: Project for speed benchmarking of various pretrained models of different NLP libraries. This project runs various models on unannotated text, to measure the average speed in words per second (WPS). Note that a fair comparison should also take into account the type of annotations produced by each model, and the accuracy scores of the various pretrained NLP tasks. This example project only addresses the speed issue, but can be extended to perform more detailed comparisons on any data. @@ -8,12 +8,12 @@ This project runs various models on unannotated text, to measure the average spe The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -26,7 +26,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -38,11 +38,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `texts/reddit-100k.jsonl` | URL | The texts to process | - + diff --git a/benchmarks/textcat_architectures/README.md b/benchmarks/textcat_architectures/README.md index 92bfcf57d..bbc1bcf51 100644 --- a/benchmarks/textcat_architectures/README.md +++ b/benchmarks/textcat_architectures/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Textcat performance benchmarks +# ๐Ÿช Weasel Project: Textcat performance benchmarks Benchmarking different textcat architectures on different datasets. @@ -8,12 +8,12 @@ Benchmarking different textcat architectures on different datasets. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -25,7 +25,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -36,7 +36,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -44,4 +44,4 @@ in the project directory. | `assets/aclImdb_v1.tar.gz` | URL | Movie Review Dataset by Maas et al., ACL 2011. | | `assets/dbpedia_csv.tgz` | URL | DBPedia ontology with 14 nonoverlapping classes by Zhang et al., 2015. | - \ No newline at end of file + \ No newline at end of file diff --git a/benchmarks/ud_benchmark/README.md b/benchmarks/ud_benchmark/README.md index 9afa9eeec..18bc1fb60 100644 --- a/benchmarks/ud_benchmark/README.md +++ b/benchmarks/ud_benchmark/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Universal Dependencies v2.5 Benchmarks +# ๐Ÿช Weasel Project: Universal Dependencies v2.5 Benchmarks This project template lets you train a spaCy pipeline on any [Universal Dependencies](https://universaldependencies.org/) corpus (v2.5) for benchmarking purposes. The pipeline includes an experimental trainable tokenizer, an experimental edit tree lemmatizer, and the standard spaCy tagger, morphologizer and dependency parser components. The CoNLL 2018 evaluation script is used to evaluate the pipeline. The template uses the [`UD_English-EWT`](https://github.com/UniversalDependencies/UD_English-EWT) treebank by default, but you can swap it out for any other available treebank. Just make sure to adjust the `ud_treebank` and `spacy_lang` settings in the config. Use `xx` (multi-language) for `spacy_lang` if a particular language is not supported by spaCy. The tokenizer in particular is only intended for use in this generic benchmarking setup. It is not optimized for speed and it does not perform particularly well for languages without space-separated tokens. In production, custom rules for spaCy's rule-based tokenizer or a language-specific word segmenter such as jieba for Chinese or sudachipy for Japanese would be recommended instead. @@ -8,12 +8,12 @@ This project template lets you train a spaCy pipeline on any [Universal Dependen The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -31,7 +31,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -42,11 +42,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/ud-treebanks-v2.5.tgz` | URL | | - + diff --git a/benchmarks/ud_benchmark/project.yml b/benchmarks/ud_benchmark/project.yml index e85ade4dc..ff1c6b680 100644 --- a/benchmarks/ud_benchmark/project.yml +++ b/benchmarks/ud_benchmark/project.yml @@ -1,8 +1,6 @@ title: "Universal Dependencies v2.5 Benchmarks" description: "This project template lets you train a spaCy pipeline on any [Universal Dependencies](https://universaldependencies.org/) corpus (v2.5) for benchmarking purposes. The pipeline includes an experimental trainable tokenizer, an experimental edit tree lemmatizer, and the standard spaCy tagger, morphologizer and dependency parser components. The CoNLL 2018 evaluation script is used to evaluate the pipeline. The template uses the [`UD_English-EWT`](https://github.com/UniversalDependencies/UD_English-EWT) treebank by default, but you can swap it out for any other available treebank. Just make sure to adjust the `ud_treebank` and `spacy_lang` settings in the config. Use `xx` (multi-language) for `spacy_lang` if a particular language is not supported by spaCy. The tokenizer in particular is only intended for use in this generic benchmarking setup. It is not optimized for speed and it does not perform particularly well for languages without space-separated tokens. In production, custom rules for spaCy's rule-based tokenizer or a language-specific word segmenter such as jieba for Chinese or sudachipy for Japanese would be recommended instead." -spacy_version: ">=3.2.1,<4.0.0" - # Variables can be referenced across the project.yml using ${vars.var_name} vars: ud_treebank: "UD_English-EWT" diff --git a/benchmarks/ud_benchmark/requirements.txt b/benchmarks/ud_benchmark/requirements.txt index 38a9b68b9..26d90ca38 100644 --- a/benchmarks/ud_benchmark/requirements.txt +++ b/benchmarks/ud_benchmark/requirements.txt @@ -1,2 +1,3 @@ spacy-transformers>=1.1.3,<1.2.0 spacy-experimental>=0.2.0,<0.7.0 +spacy>=3.2.1,<4.0.0 diff --git a/experimental/coref/README.md b/experimental/coref/README.md index 64d9525bd..4bcb5a671 100644 --- a/experimental/coref/README.md +++ b/experimental/coref/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Training a spaCy Coref Model +# ๐Ÿช Weasel Project: Training a spaCy Coref Model This project trains a coreference model for spaCy using OntoNotes. @@ -9,12 +9,12 @@ This project trains a coreference model for spaCy using OntoNotes. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -31,7 +31,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -44,7 +44,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -52,7 +52,7 @@ in the project directory. | `assets/` | Git | CoNLL-2012 scripts and dehydrated data, used for preprocessing OntoNotes. | | `assets/litbank` | Git | LitBank dataset. Only used for building data for quick unit tests. | - + # Getting Started diff --git a/experimental/coref/project.yml b/experimental/coref/project.yml index 4f637195b..a11479609 100644 --- a/experimental/coref/project.yml +++ b/experimental/coref/project.yml @@ -2,8 +2,6 @@ title: "Training a spaCy Coref Model" description: | This project trains a coreference model for spaCy using OntoNotes. -spacy_version: ">=3.3.0,<4.0.0" - vars: # XXX Change to your actual GPU ID gpu_id: 0 diff --git a/experimental/coref/requirements.txt b/experimental/coref/requirements.txt index ddde2ac3a..a754f3a9c 100644 --- a/experimental/coref/requirements.txt +++ b/experimental/coref/requirements.txt @@ -1,3 +1,4 @@ spacy-transformers>=1.1.6,<1.2.0 en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.4.0/en_core_web_lg-3.4.0-py3-none-any.whl spacy-experimental>=0.6.0,<0.7.0 +spacy>=3.3.0,<4.0.0 diff --git a/experimental/ner_spancat/README.md b/experimental/ner_spancat/README.md index e03989008..3cf82b8d4 100644 --- a/experimental/ner_spancat/README.md +++ b/experimental/ner_spancat/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Example SpanCategorizer project using Indonesian NER +# ๐Ÿช Weasel Project: Example SpanCategorizer project using Indonesian NER The SpanCategorizer is a component in **spaCy v3.1+** for assigning labels to contiguous spans of text proposed by a customizable suggester function. Unlike spaCy's EntityRecognizer component, the SpanCategorizer can recognize nested or overlapping spans. It also doesn't rely as heavily on consistent starting and ending words, so it may be a better fit for non-NER span labelling tasks. You do have to write a function that proposes your candidate spans, however. If your spans are often short, you could propose all spans under a certain size. You could also use syntactic constituents such as noun phrases or noun chunks, or matcher rules. @@ -8,12 +8,12 @@ The SpanCategorizer is a component in **spaCy v3.1+** for assigning labels to co The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -26,7 +26,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -37,11 +37,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/nergrit_ner-grit` | Git | | - + diff --git a/experimental/ner_spancat/project.yml b/experimental/ner_spancat/project.yml index d2c9374ac..dc77d134b 100644 --- a/experimental/ner_spancat/project.yml +++ b/experimental/ner_spancat/project.yml @@ -1,6 +1,5 @@ title: "Example SpanCategorizer project using Indonesian NER" description: "The SpanCategorizer is a component in **spaCy v3.1+** for assigning labels to contiguous spans of text proposed by a customizable suggester function. Unlike spaCy's EntityRecognizer component, the SpanCategorizer can recognize nested or overlapping spans. It also doesn't rely as heavily on consistent starting and ending words, so it may be a better fit for non-NER span labelling tasks. You do have to write a function that proposes your candidate spans, however. If your spans are often short, you could propose all spans under a certain size. You could also use syntactic constituents such as noun phrases or noun chunks, or matcher rules." -spacy_version: ">=3.1.0,<4.0.0" vars: config: "spancat" # "ner" diff --git a/experimental/ner_spancat_compare/README.md b/experimental/ner_spancat_compare/README.md index f140041d2..0ed7c2557 100644 --- a/experimental/ner_spancat_compare/README.md +++ b/experimental/ner_spancat_compare/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Comparing SpanCat and NER using a corpus of biomedical literature (GENIA) +# ๐Ÿช Weasel Project: Comparing SpanCat and NER using a corpus of biomedical literature (GENIA) This project demonstrates how spaCy's Span Categorization (SpanCat) and Named-Entity Recognition (NER) perform on different types of entities. Here, we used @@ -11,12 +11,12 @@ a dataset of biomedical literature containing both overlapping and non-overlappi The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -33,7 +33,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -46,7 +46,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -55,7 +55,7 @@ in the project directory. | `assets/dev.iob2` | URL | The evaluation dataset for GENIA in IOB format. | | `assets/test.iob2` | URL | The test dataset for GENIA in IOB format. | - + ### About the dataset diff --git a/experimental/ner_wikiner_speedster/README.md b/experimental/ner_wikiner_speedster/README.md index 74c6d693f..39ef919c8 100644 --- a/experimental/ner_wikiner_speedster/README.md +++ b/experimental/ner_wikiner_speedster/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Named Entity Recognition (WikiNER) accelerated using speedster +# ๐Ÿช Weasel Project: Named Entity Recognition (WikiNER) accelerated using speedster This project shows how `speedster` can accelerate spaCy's WikiNER pipeline. @@ -14,12 +14,12 @@ Further info on the WikiNER pipeline can be found in [this section](https://gith The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -32,7 +32,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -43,14 +43,14 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/aij-wikiner-en-wp2.bz2` | URL | | - + ## ๐Ÿš€ install speedster diff --git a/integrations/fastapi/README.md b/integrations/fastapi/README.md index e4211ee46..c04b043c0 100644 --- a/integrations/fastapi/README.md +++ b/integrations/fastapi/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: FastAPI integration +# ๐Ÿช Weasel Project: FastAPI integration Use [FastAPI](https://fastapi.tiangolo.com/) to serve your spaCy models and host modern REST APIs. To start the server, you can run `spacy project run start`. To explore the REST API interactively, navigate to `http://127.0.0.1:5000/docs` in your browser. See the examples for how to query the API using Python or JavaScript. @@ -8,12 +8,12 @@ Use [FastAPI](https://fastapi.tiangolo.com/) to serve your spaCy models and host The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -24,7 +24,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -35,14 +35,14 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/data.jsonl` | URL | Selected sentences from the CMU Movie Summary Corpus used for testing | - + ## ๐Ÿš€ Examples diff --git a/integrations/huggingface_hub/README.md b/integrations/huggingface_hub/README.md index 9ca2dd9bb..6e92cd20a 100644 --- a/integrations/huggingface_hub/README.md +++ b/integrations/huggingface_hub/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Hugging Face Hub integration +# ๐Ÿช Weasel Project: Hugging Face Hub integration With [Hugging Face Hub](https://https://huggingface.co/), you can easily share any trained pipeline with the community. The Hugging Face Hub offers: @@ -15,12 +15,12 @@ This uses [`spacy-huggingface-hub`](https://github.com/explosion/spacy-huggingfa The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -35,7 +35,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -46,7 +46,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -54,4 +54,4 @@ in the project directory. | [`assets/fashion_brands_training.jsonl`](assets/fashion_brands_training.jsonl) | Local | JSONL-formatted training data exported from Prodigy, annotated with `FASHION_BRAND` entities (1235 examples) | | [`assets/fashion_brands_eval.jsonl`](assets/fashion_brands_eval.jsonl) | Local | JSONL-formatted development data exported from Prodigy, annotated with `FASHION_BRAND` entities (500 examples) | - \ No newline at end of file + \ No newline at end of file diff --git a/integrations/prodigy/README.md b/integrations/prodigy/README.md index 02e1516f7..8678cc0cc 100644 --- a/integrations/prodigy/README.md +++ b/integrations/prodigy/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Prodigy annotation tool integration +# ๐Ÿช Weasel Project: Prodigy annotation tool integration This project shows how to integrate the [Prodigy](https://prodi.gy) annotation tool (requires **v1.11+**) into your spaCy project template to automatically **export annotations** you've created and **train your model** on the collected data. Note that in order to run this template, you'll need to install Prodigy separately into your environment. For details on how the data was created, check out this [project template](https://github.com/explosion/projects/tree/v3/tutorials/ner_fashion_brands) and [blog post](https://explosion.ai/blog/sense2vec-reloaded#annotation). > โš ๏ธ **Important note:** The example in this project uses a separate step `db-in` to export the example annotations into your database, so you can easily run it end-to-end. In your own workflows, you can leave this out and access the given dataset you've annotated directly. @@ -9,12 +9,12 @@ This project shows how to integrate the [Prodigy](https://prodi.gy) annotation t The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -29,7 +29,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -41,7 +41,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -49,4 +49,4 @@ in the project directory. | `assets/fashion_brands_training.jsonl.jsonl` | Local | JSONL-formatted training data exported from Prodigy, annotated with `FASHION_BRAND` entities (1235 examples) | | `assets/fashion_brands_eval.jsonl.jsonl` | Local | JSONL-formatted development data exported from Prodigy, annotated with `FASHION_BRAND` entities (500 examples) | - + diff --git a/integrations/ray/README.md b/integrations/ray/README.md index f8296447f..306a5595f 100644 --- a/integrations/ray/README.md +++ b/integrations/ray/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Ray integration +# ๐Ÿช Weasel Project: Ray integration Use [Ray](https://ray.io) and the [`spacy-ray`](https://github.com/explosion/spacy-ray) extension package for parallel and distributed training. To configure the number of workers, you can change the `n_workers` variable in the `project.yml`. @@ -8,12 +8,12 @@ Use [Ray](https://ray.io) and the [`spacy-ray`](https://github.com/explosion/spa The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -24,7 +24,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -35,11 +35,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/UD_English-EWT` | Git | | - + diff --git a/integrations/streamlit/README.md b/integrations/streamlit/README.md index 32182301f..49042b73b 100644 --- a/integrations/streamlit/README.md +++ b/integrations/streamlit/README.md @@ -1,8 +1,8 @@ - + -# ๐Ÿช spaCy Project: Streamlit integration +# ๐Ÿช Weasel Project: Streamlit integration [Streamlit](https://streamlit.io) is a Python framework for building interactive data apps. The [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit) package helps you integrate spaCy visualizations into your Streamlit apps and quickly spin up demos to explore your pipelines interactively. It includes a full embedded visualizer, as well as individual components. If you're training your own pipelines, you can integrate the `visualize` command into your `project.yml` and pass in the path to your exported pipeline to visualize it. See the tutorial project templates for examples. @@ -10,12 +10,12 @@ The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -23,7 +23,7 @@ Commands are only re-run if their inputs have changed. | `download` | Download models | | `visualize` | Visualize a pipeline interactively using Streamlit | - + ## Example diff --git a/integrations/wandb/README.md b/integrations/wandb/README.md index ec5cec593..644a18def 100644 --- a/integrations/wandb/README.md +++ b/integrations/wandb/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Weights & Biases integration +# ๐Ÿช Weasel Project: Weights & Biases integration Use [Weights & Biases](https://www.wandb.com/) for logging of training experiments. This project template uses the IMDB Movie Review Dataset and includes two workflows: `log` for training a simple text classification model and logging the results to Weights & Biases (works out-of-the-box and only requires the `[training.logger]` to be set in the config) and `parameter-search` for running a hyperparameter search using [Weights & Biases Sweeps](https://docs.wandb.ai/guides/sweeps), running the experiments and logging the results. @@ -8,12 +8,12 @@ Use [Weights & Biases](https://www.wandb.com/) for logging of training experimen The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -27,7 +27,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -39,11 +39,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/aclImdb_v1.tar.gz` | URL | Movie Review Dataset for sentiment analysis by Maas et al., ACL 2011. | - + diff --git a/pipelines/edit_tree_lemmatizer/README.md b/pipelines/edit_tree_lemmatizer/README.md index 93585cb0e..369545c31 100644 --- a/pipelines/edit_tree_lemmatizer/README.md +++ b/pipelines/edit_tree_lemmatizer/README.md @@ -1,17 +1,17 @@ - + -# ๐Ÿช spaCy Project: Demo the trainable edit-tree lemmatizer +# ๐Ÿช Weasel Project: Demo the trainable edit-tree lemmatizer ## ๐Ÿ“‹ project.yml The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -24,7 +24,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -35,11 +35,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/UD_Dutch-Alpino` | Git | | - \ No newline at end of file + \ No newline at end of file diff --git a/pipelines/floret_fi_core_demo/README.md b/pipelines/floret_fi_core_demo/README.md index 3e24cdca8..843057d72 100644 --- a/pipelines/floret_fi_core_demo/README.md +++ b/pipelines/floret_fi_core_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo floret vectors for Finnish +# ๐Ÿช Weasel Project: Demo floret vectors for Finnish Train floret vectors on OSCAR and compare standard vectors vs. floret vectors on UD Finnish TDT and turku-ner-corpus. @@ -8,12 +8,12 @@ Train floret vectors on OSCAR and compare standard vectors vs. floret vectors on The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -40,7 +40,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -51,7 +51,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -59,4 +59,4 @@ in the project directory. | `assets/UD_Finnish-TDT` | Git | | | `assets/turku-ner-corpus` | Git | | - + diff --git a/pipelines/floret_fi_core_demo/project.yml b/pipelines/floret_fi_core_demo/project.yml index 05f3fc0de..ca6a7556a 100644 --- a/pipelines/floret_fi_core_demo/project.yml +++ b/pipelines/floret_fi_core_demo/project.yml @@ -1,6 +1,5 @@ title: "Demo floret vectors for Finnish" description: "Train floret vectors on OSCAR and compare standard vectors vs. floret vectors on UD Finnish TDT and turku-ner-corpus." -spacy_version: ">=3.2.0,<4.0.0" # Variables can be referenced across the project.yml using ${vars.var_name} vars: name: "floret_fi_ud_demo" diff --git a/pipelines/floret_fi_core_demo/requirements.txt b/pipelines/floret_fi_core_demo/requirements.txt index 059ea1e40..d7d78c243 100644 --- a/pipelines/floret_fi_core_demo/requirements.txt +++ b/pipelines/floret_fi_core_demo/requirements.txt @@ -1,2 +1,3 @@ floret>=0.10.1,<0.11.0 datasets[streaming] +spacy>=3.2.0,<4.0.0 diff --git a/pipelines/floret_ko_ud_demo/README.md b/pipelines/floret_ko_ud_demo/README.md index acab2cf4e..ea84b33db 100644 --- a/pipelines/floret_ko_ud_demo/README.md +++ b/pipelines/floret_ko_ud_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo floret vectors for UD Korean Kaist +# ๐Ÿช Weasel Project: Demo floret vectors for UD Korean Kaist Train floret vectors on OSCAR and compare no vectors, standard vectors, and floret vectors on UD Korean Kaist. @@ -8,12 +8,12 @@ Train floret vectors on OSCAR and compare no vectors, standard vectors, and flor The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -34,7 +34,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -45,11 +45,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/UD_Korean-Kaist` | Git | | - + diff --git a/pipelines/floret_ko_ud_demo/project.yml b/pipelines/floret_ko_ud_demo/project.yml index 57834e9c4..e7c9d3198 100644 --- a/pipelines/floret_ko_ud_demo/project.yml +++ b/pipelines/floret_ko_ud_demo/project.yml @@ -1,6 +1,5 @@ title: "Demo floret vectors for UD Korean Kaist" description: "Train floret vectors on OSCAR and compare no vectors, standard vectors, and floret vectors on UD Korean Kaist." -spacy_version: ">=3.2.0,<4.0.0" # Variables can be referenced across the project.yml using ${vars.var_name} vars: name: "ftb_ko_ud_demo" diff --git a/pipelines/floret_ko_ud_demo/requirements.txt b/pipelines/floret_ko_ud_demo/requirements.txt index 087dceb2d..9a306e569 100644 --- a/pipelines/floret_ko_ud_demo/requirements.txt +++ b/pipelines/floret_ko_ud_demo/requirements.txt @@ -7,3 +7,4 @@ datasets[streaming] # - mecab-ko-dic: https://bitbucket.org/eunjeon/mecab-ko-dic # - natto-py: https://github.com/buruzaemon/natto-py natto-py +spacy>=3.2.0,<4.0.0 diff --git a/pipelines/floret_vectors_demo/README.md b/pipelines/floret_vectors_demo/README.md index 7e1f71d7f..5f7f2e5b4 100644 --- a/pipelines/floret_vectors_demo/README.md +++ b/pipelines/floret_vectors_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo floret vectors +# ๐Ÿช Weasel Project: Demo floret vectors Train floret vectors and load them into a spaCy vectors model. @@ -8,12 +8,12 @@ Train floret vectors and load them into a spaCy vectors model. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -26,7 +26,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -34,4 +34,4 @@ inputs have changed. | --- | --- | | `all` | `tokenize-oscar` → `train-floret` → `init-floret-vectors` → `floret-nn` | - + diff --git a/pipelines/floret_vectors_demo/project.yml b/pipelines/floret_vectors_demo/project.yml index 8dc84fae8..88815d86c 100644 --- a/pipelines/floret_vectors_demo/project.yml +++ b/pipelines/floret_vectors_demo/project.yml @@ -1,6 +1,5 @@ title: "Demo floret vectors" description: "Train floret vectors and load them into a spaCy vectors model." -spacy_version: ">=3.2.0,<4.0.0" # Variables can be referenced across the project.yml using ${vars.var_name} vars: name: "floret_vectors" diff --git a/pipelines/floret_vectors_demo/requirements.txt b/pipelines/floret_vectors_demo/requirements.txt index 059ea1e40..d7d78c243 100644 --- a/pipelines/floret_vectors_demo/requirements.txt +++ b/pipelines/floret_vectors_demo/requirements.txt @@ -1,2 +1,3 @@ floret>=0.10.1,<0.11.0 datasets[streaming] +spacy>=3.2.0,<4.0.0 diff --git a/pipelines/floret_wiki_oscar_vectors/README.md b/pipelines/floret_wiki_oscar_vectors/README.md index 1d613d650..d48672ac9 100644 --- a/pipelines/floret_wiki_oscar_vectors/README.md +++ b/pipelines/floret_wiki_oscar_vectors/README.md @@ -1,7 +1,7 @@ - + -# ๐Ÿช spaCy Project: Train floret vectors from Wikipedia and OSCAR +# ๐Ÿช Weasel Project: Train floret vectors from Wikipedia and OSCAR This project downloads, extracts and preprocesses texts from Wikipedia and OSCAR and trains vectors with [floret](https://github.com/explosion/floret). @@ -13,12 +13,12 @@ By default, the project trains floret vectors for Macedonian. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -33,7 +33,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -44,14 +44,14 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `/scratch/vectors/downloaded/wikipedia/mkwiki-latest-pages-articles.xml.bz2` | URL | | - + # Usage Notes diff --git a/pipelines/floret_wiki_oscar_vectors/project.yml b/pipelines/floret_wiki_oscar_vectors/project.yml index 1ceac582c..915bd79d0 100644 --- a/pipelines/floret_wiki_oscar_vectors/project.yml +++ b/pipelines/floret_wiki_oscar_vectors/project.yml @@ -5,7 +5,6 @@ description: | By default, the project trains floret vectors for Macedonian. -spacy_version: ">=3.2.0,<4.0.0" vars: name: "vectors" lang: "mk" diff --git a/pipelines/floret_wiki_oscar_vectors/requirements.txt b/pipelines/floret_wiki_oscar_vectors/requirements.txt index 3c1160095..74a07fb12 100644 --- a/pipelines/floret_wiki_oscar_vectors/requirements.txt +++ b/pipelines/floret_wiki_oscar_vectors/requirements.txt @@ -9,3 +9,5 @@ more-itertools>=8.8.0 # Tested with v1.18.3, earlier versions may also work. datasets>=1.18.0 + +spacy>=3.2.0,<4.0.0 diff --git a/pipelines/ner_demo/README.md b/pipelines/ner_demo/README.md index e092cd7c6..eb30bde3f 100644 --- a/pipelines/ner_demo/README.md +++ b/pipelines/ner_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo NER in a new pipeline (Named Entity Recognition) +# ๐Ÿช Weasel Project: Demo NER in a new pipeline (Named Entity Recognition) A minimal demo NER project for spaCy v3 adapted from the spaCy v2 [`train_ner.py`](https://github.com/explosion/spaCy/blob/v2.3.x/examples/training/train_ner.py) example script for creating an NER component in a new pipeline. @@ -8,12 +8,12 @@ A minimal demo NER project for spaCy v3 adapted from the spaCy v2 [`train_ner.py The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -30,7 +30,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -41,7 +41,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -49,4 +49,4 @@ in the project directory. | [`assets/train.json`](assets/train.json) | Local | Demo training data converted from the v2 `train_ner.py` example with `srsly.write_json("train.json", TRAIN_DATA)` | | [`assets/dev.json`](assets/dev.json) | Local | Demo development data | - + diff --git a/pipelines/ner_demo_replace/README.md b/pipelines/ner_demo_replace/README.md index 9ef9ead80..d5900bea7 100644 --- a/pipelines/ner_demo_replace/README.md +++ b/pipelines/ner_demo_replace/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo replacing an NER component in a pretrained pipeline +# ๐Ÿช Weasel Project: Demo replacing an NER component in a pretrained pipeline A minimal demo NER project that replaces the NER component in an existing pretrained pipeline. All other pipeline components are preserved and frozen during training. @@ -8,12 +8,12 @@ A minimal demo NER project that replaces the NER component in an existing pretra The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -29,7 +29,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -40,7 +40,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -48,4 +48,4 @@ in the project directory. | [`assets/train.json`](assets/train.json) | Local | Demo training data converted from the v2 example scripts with `srsly.write_json("train.json", TRAIN_DATA)` | | [`assets/dev.json`](assets/dev.json) | Local | Demo development data | - + diff --git a/pipelines/ner_demo_replace/project.yml b/pipelines/ner_demo_replace/project.yml index 8df74bfdd..453d7c8d2 100644 --- a/pipelines/ner_demo_replace/project.yml +++ b/pipelines/ner_demo_replace/project.yml @@ -1,6 +1,5 @@ title: "Demo replacing an NER component in a pretrained pipeline" description: "A minimal demo NER project that replaces the NER component in an existing pretrained pipeline. All other pipeline components are preserved and frozen during training." -spacy_version: ">=3.0.6,<4.0.0" # Variables can be referenced across the project.yml using ${vars.var_name} vars: name: "ner_demo_replace" diff --git a/pipelines/ner_demo_replace/requirements.txt b/pipelines/ner_demo_replace/requirements.txt index ff2451e0c..b8c4f5467 100644 --- a/pipelines/ner_demo_replace/requirements.txt +++ b/pipelines/ner_demo_replace/requirements.txt @@ -1,2 +1,3 @@ spacy-streamlit>=1.0.0a0 spacy-lookups-data>=1.0.0,<1.1.0 +spacy>=3.0.6,<4.0.0 diff --git a/pipelines/ner_demo_update/README.md b/pipelines/ner_demo_update/README.md index ccfd83352..12e1c3881 100644 --- a/pipelines/ner_demo_update/README.md +++ b/pipelines/ner_demo_update/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo updating an NER component in a pretrained pipeline +# ๐Ÿช Weasel Project: Demo updating an NER component in a pretrained pipeline A demo NER project that updates the NER component in an existing pretrained pipeline. All other pipeline components are preserved and frozen during training. @@ -8,12 +8,12 @@ A demo NER project that updates the NER component in an existing pretrained pipe The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -29,7 +29,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -40,7 +40,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -48,4 +48,4 @@ in the project directory. | [`assets/train.json`](assets/train.json) | Local | Demo training data converted from the v2 example scripts with `srsly.write_json("train.json", TRAIN_DATA)` | | [`assets/dev.json`](assets/dev.json) | Local | Demo development data | - + diff --git a/pipelines/ner_demo_update/project.yml b/pipelines/ner_demo_update/project.yml index dc95a47a8..b58e2e887 100644 --- a/pipelines/ner_demo_update/project.yml +++ b/pipelines/ner_demo_update/project.yml @@ -1,6 +1,5 @@ title: "Demo updating an NER component in a pretrained pipeline" description: "A demo NER project that updates the NER component in an existing pretrained pipeline. All other pipeline components are preserved and frozen during training." -spacy_version: ">=3.0.6,<4.0.0" # Variables can be referenced across the project.yml using ${vars.var_name} vars: name: "ner_demo_update" diff --git a/pipelines/ner_demo_update/requirements.txt b/pipelines/ner_demo_update/requirements.txt index ff2451e0c..b8c4f5467 100644 --- a/pipelines/ner_demo_update/requirements.txt +++ b/pipelines/ner_demo_update/requirements.txt @@ -1,2 +1,3 @@ spacy-streamlit>=1.0.0a0 spacy-lookups-data>=1.0.0,<1.1.0 +spacy>=3.0.6,<4.0.0 diff --git a/pipelines/ner_wikiner/README.md b/pipelines/ner_wikiner/README.md index ffa29a1ac..456b4743b 100644 --- a/pipelines/ner_wikiner/README.md +++ b/pipelines/ner_wikiner/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Named Entity Recognition (WikiNER) +# ๐Ÿช Weasel Project: Named Entity Recognition (WikiNER) Simple example of downloading and converting source data and training a named entity recognition model. The example uses the WikiNER corpus, which was constructed semi-automatically. The main advantage of this corpus is that it's freely available, so the data can be downloaded as a project asset. The WikiNER corpus is distributed in IOB format, a fairly common text encoding for sequence data. The `corpus` subcommand splits the corpus into training, development and testing partitions, and uses `spacy convert` to convert them into spaCy's binary format. You can then edit the config to try out different settings, and trigger training with the `train` subcommand. @@ -8,12 +8,12 @@ Simple example of downloading and converting source data and training a named en The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -26,7 +26,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -37,14 +37,14 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/aij-wikiner-en-wp2.bz2` | URL | | - + ## ๐Ÿš€ Accelerate If you are interested in accelerating this pipeline, have a look at [ner_wikiner_speedster](https://github.com/explosion/projects/tree/v3/experimental/ner_wikiner_speedster) pipeline. diff --git a/pipelines/parser_demo/README.md b/pipelines/parser_demo/README.md index 57a9a1106..0f379a54a 100644 --- a/pipelines/parser_demo/README.md +++ b/pipelines/parser_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo Dependency Parser +# ๐Ÿช Weasel Project: Demo Dependency Parser A minimal demo parser project for spaCy v3 adapted from the spaCy v2 [`train_parser.py`](https://github.com/explosion/spaCy/blob/v2.3.x/examples/training/train_parser.py) example script. @@ -8,12 +8,12 @@ A minimal demo parser project for spaCy v3 adapted from the spaCy v2 [`train_par The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -30,7 +30,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -41,7 +41,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -49,4 +49,4 @@ in the project directory. | [`assets/train.json`](assets/train.json) | Local | Demo training data converted from the v2 `train_parser.py` example with `srsly.write_json("train.json", TRAIN_DATA)` | | [`assets/dev.json`](assets/dev.json) | Local | Demo development data | - + diff --git a/pipelines/parser_intent_demo/README.md b/pipelines/parser_intent_demo/README.md index 22942f2a3..567722cc1 100644 --- a/pipelines/parser_intent_demo/README.md +++ b/pipelines/parser_intent_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo Intent Parser (Dependency Parser) +# ๐Ÿช Weasel Project: Demo Intent Parser (Dependency Parser) A minimal demo parser project for spaCy v3 adapted from the spaCy v2 [`train_intent_parser.py`](https://github.com/explosion/spaCy/blob/v2.3.x/examples/training/train_intent_parser.py) example script. @@ -8,12 +8,12 @@ A minimal demo parser project for spaCy v3 adapted from the spaCy v2 [`train_int The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -30,7 +30,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -41,7 +41,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -49,4 +49,4 @@ in the project directory. | [`assets/train.json`](assets/train.json) | Local | Demo training data converted from the v2 `train_intent_parser.py` example with `srsly.write_json("train.json", TRAIN_DATA)` | | [`assets/dev.json`](assets/dev.json) | Local | Demo development data | - + diff --git a/pipelines/polar_component/README.md b/pipelines/polar_component/README.md index 71aafff6e..87d1e60a7 100644 --- a/pipelines/polar_component/README.md +++ b/pipelines/polar_component/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Polar Component +# ๐Ÿช Weasel Project: Polar Component This example project shows how to implement a simple stateful component to score docs on semantic poles. @@ -27,12 +27,12 @@ See [the assets section of this README](#assets) for the link. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -42,7 +42,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -53,11 +53,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/IMDB Dataset.csv` | Local | IMDB Review Corpus. Download from [Kaggle](https://www.kaggle.com/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews). | - + diff --git a/pipelines/spancat_demo/README.md b/pipelines/spancat_demo/README.md index 22b711a25..fd8acf043 100644 --- a/pipelines/spancat_demo/README.md +++ b/pipelines/spancat_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo spancat in a new pipeline (Span Categorization) +# ๐Ÿช Weasel Project: Demo spancat in a new pipeline (Span Categorization) A minimal demo spancat project for spaCy v3 @@ -8,12 +8,12 @@ A minimal demo spancat project for spaCy v3 The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -30,7 +30,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -42,7 +42,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -50,4 +50,4 @@ in the project directory. | [`assets/train.json`](assets/train.json) | Local | Demo training data adapted from the `ner_demo` project | | [`assets/dev.json`](assets/dev.json) | Local | Demo development data | - + diff --git a/pipelines/tagger_parser_predicted_annotations/README.md b/pipelines/tagger_parser_predicted_annotations/README.md index cf11d3278..bf61c1e81 100644 --- a/pipelines/tagger_parser_predicted_annotations/README.md +++ b/pipelines/tagger_parser_predicted_annotations/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Using Predicted Annotations in Subsequent Components +# ๐Ÿช Weasel Project: Using Predicted Annotations in Subsequent Components This project shows how to use the predictions from one pipeline component as features for a subsequent pipeline component in **spaCy v3.1+**. In this demo, which trains a parser and a tagger on [`UD_English-EWT`](https://github.com/UniversalDependencies/UD_English-EWT), the `token.dep` attribute from the parser is used as a feature by the tagger. To make the predicted `DEP` available to the tagger during training, `DEP` is added to `[components.tagger.model.tok2vec.embed.attrs]` and `parser` is added to `[training.annotating_components]` in the config. This particular example does not lead to a large difference in performance, but the tagger accuracy improves from to 92.67% to 92.97% with the addition of `DEP`. @@ -8,12 +8,12 @@ This project shows how to use the predictions from one pipeline component as fea The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -27,7 +27,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -38,11 +38,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/UD_English-EWT` | Git | | - + diff --git a/pipelines/tagger_parser_predicted_annotations/project.yml b/pipelines/tagger_parser_predicted_annotations/project.yml index da1167eaf..907ceb5a4 100644 --- a/pipelines/tagger_parser_predicted_annotations/project.yml +++ b/pipelines/tagger_parser_predicted_annotations/project.yml @@ -1,6 +1,5 @@ title: "Using Predicted Annotations in Subsequent Components" description: "This project shows how to use the predictions from one pipeline component as features for a subsequent pipeline component in **spaCy v3.1+**. In this demo, which trains a parser and a tagger on [`UD_English-EWT`](https://github.com/UniversalDependencies/UD_English-EWT), the `token.dep` attribute from the parser is used as a feature by the tagger. To make the predicted `DEP` available to the tagger during training, `DEP` is added to `[components.tagger.model.tok2vec.embed.attrs]` and `parser` is added to `[training.annotating_components]` in the config. This particular example does not lead to a large difference in performance, but the tagger accuracy improves from to 92.67% to 92.97% with the addition of `DEP`." -spacy_version: ">=3.1.0,<4.0.0" # Variables can be referenced across the project.yml using ${vars.var_name} vars: diff --git a/pipelines/tagger_parser_ud/README.md b/pipelines/tagger_parser_ud/README.md index ce99ccc80..75b44ef44 100644 --- a/pipelines/tagger_parser_ud/README.md +++ b/pipelines/tagger_parser_ud/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Part-of-speech Tagging & Dependency Parsing (Universal Dependencies) +# ๐Ÿช Weasel Project: Part-of-speech Tagging & Dependency Parsing (Universal Dependencies) This project template lets you train a part-of-speech tagger, morphologizer, lemmatizer and dependency parser from a [Universal Dependencies](https://universaldependencies.org/) corpus. It takes care of downloading the treebank, converting it to spaCy's format and training and evaluating the model. The template uses the [`UD_English-EWT`](https://github.com/UniversalDependencies/UD_English-EWT) treebank by default, but you can swap it out for any other available treebank. Just make sure to adjust the `lang` and treebank settings in the variables below. Use `xx` for multi-language if no language-specific tokenizer is available in spaCy. Note that multi-word tokens will be merged together when the corpus is converted since spaCy does not support multi-word token expansion. @@ -9,12 +9,12 @@ This project template lets you train a part-of-speech tagger, morphologizer, lem The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -28,7 +28,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -39,11 +39,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/UD_English-EWT` | Git | | - + diff --git a/pipelines/tagger_parser_ud/project.yml b/pipelines/tagger_parser_ud/project.yml index 4114eaf61..36de4f00a 100644 --- a/pipelines/tagger_parser_ud/project.yml +++ b/pipelines/tagger_parser_ud/project.yml @@ -25,8 +25,6 @@ vars: package_version: "0.0.0" gpu: -1 -spacy_version: ">=3.3.0,<4.0.0" - # These are the directories that the project needs. The project CLI will make # sure that they always exist. directories: ["assets", "corpus", "training", "metrics", "configs", "packages"] diff --git a/pipelines/textcat_demo/README.md b/pipelines/textcat_demo/README.md index 7bf2dd9d1..45a47fbed 100644 --- a/pipelines/textcat_demo/README.md +++ b/pipelines/textcat_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo Textcat (Text Classification) +# ๐Ÿช Weasel Project: Demo Textcat (Text Classification) A minimal demo textcat project for spaCy v3. The demo data comes from the [tutorials/textcat_docs_issues](https://github.com/explosion/projects/tree/v3/tutorials/textcat_docs_issues) project. @@ -8,12 +8,12 @@ A minimal demo textcat project for spaCy v3. The demo data comes from the [tutor The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -27,7 +27,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -38,7 +38,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -46,4 +46,4 @@ in the project directory. | [`assets/docs_issues_training.jsonl`](assets/docs_issues_training.jsonl) | Local | Demo training data | | [`assets/docs_issues_eval.jsonl`](assets/docs_issues_eval.jsonl) | Local | Demo development data | - + diff --git a/pipelines/textcat_multilabel_demo/README.md b/pipelines/textcat_multilabel_demo/README.md index eb51070f8..26e5989c9 100644 --- a/pipelines/textcat_multilabel_demo/README.md +++ b/pipelines/textcat_multilabel_demo/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Demo Multilabel Textcat (Text Classification) +# ๐Ÿช Weasel Project: Demo Multilabel Textcat (Text Classification) A minimal demo textcat_multilabel project for spaCy v3. @@ -8,12 +8,12 @@ A minimal demo textcat_multilabel project for spaCy v3. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -27,7 +27,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -38,7 +38,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -46,4 +46,4 @@ in the project directory. | [`assets/cooking-train.jsonl`](assets/cooking-train.jsonl) | Local | Training data from cooking.stackexchange.com | | [`assets/cooking-dev.jsonl`](assets/cooking-dev.jsonl) | Local | Development data from cooking.stackexchange.com | - + diff --git a/tutorials/nel_emerson/README.md b/tutorials/nel_emerson/README.md index 87819ce9d..f71f44d0e 100644 --- a/tutorials/nel_emerson/README.md +++ b/tutorials/nel_emerson/README.md @@ -1,8 +1,8 @@ - + -# ๐Ÿช spaCy Project: Disambiguation of "Emerson" mentions in sentences (Entity Linking) +# ๐Ÿช Weasel Project: Disambiguation of "Emerson" mentions in sentences (Entity Linking) **This project was created as part of a [step-by-step video tutorial](https://www.youtube.com/watch?v=8u57WSXVpmw).** It uses [spaCy](https://spacy.io)'s entity linking functionality and [Prodigy](https://prodi.gy) to disambiguate "Emerson" mentions in text to unique identifiers from Wikidata. As an example use-case, we consider three different people called Emerson: [an Australian tennis player](https://www.wikidata.org/wiki/Q312545), [an American writer](https://www.wikidata.org/wiki/Q48226), and a [Brazilian footballer](https://www.wikidata.org/wiki/Q215952). [See here](https://github.com/explosion/projects/tree/master/nel-emerson) for the previous scripts for spaCy v2.x. @@ -10,12 +10,12 @@ The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -30,7 +30,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -42,7 +42,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -51,7 +51,7 @@ in the project directory. | [`assets/entities.csv`](assets/entities.csv) | Local | The entities in the knowledge base | | [`assets/emerson_input_text.txt`](assets/emerson_input_text.txt) | Local | The original input text | - + ## Prodigy annotation diff --git a/tutorials/nel_emerson/project.yml b/tutorials/nel_emerson/project.yml index 8f5bb4ac4..ff097c5e5 100644 --- a/tutorials/nel_emerson/project.yml +++ b/tutorials/nel_emerson/project.yml @@ -13,8 +13,6 @@ vars: dev: "dev" version: "0.0.3" -spacy_version: ">=3.5.0,<3.6.0" - # These are the directories that the project needs. The project CLI will make # sure that they always exist. directories: ["assets", "training", "configs", "scripts", "corpus", "notebooks", "temp"] diff --git a/tutorials/nel_emerson/requirements.txt b/tutorials/nel_emerson/requirements.txt index a64bf15b5..c3f2eff28 100644 --- a/tutorials/nel_emerson/requirements.txt +++ b/tutorials/nel_emerson/requirements.txt @@ -1 +1,2 @@ -google-cloud-storage \ No newline at end of file +google-cloud-storage +spacy>=3.5.0,<3.6.0 diff --git a/tutorials/ner_double/README.md b/tutorials/ner_double/README.md index fdfcec656..7f0420868 100644 --- a/tutorials/ner_double/README.md +++ b/tutorials/ner_double/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Combining Multiple Trained NER Components +# ๐Ÿช Weasel Project: Combining Multiple Trained NER Components This project shows you the different ways you can combine multiple trained NER components and their tradeoffs. @@ -9,12 +9,12 @@ This project shows you the different ways you can combine multiple trained NER c The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -27,7 +27,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -38,14 +38,14 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | `assets/ner_drugs-0.0.0.tar.gz` | URL | Pretrained drug model | - + ## Notes on Combining Components diff --git a/tutorials/ner_double/project.yml b/tutorials/ner_double/project.yml index 26b80646a..29743cc59 100644 --- a/tutorials/ner_double/project.yml +++ b/tutorials/ner_double/project.yml @@ -8,8 +8,6 @@ vars: version: "0.0.0" drug_model: "ner_drugs-0.0.0" -spacy_version: ">=3.1.0,<4.0.0" - directories: ["assets", "configs", "scripts", "pipelines", "packages"] workflows: diff --git a/tutorials/ner_drugs/README.md b/tutorials/ner_drugs/README.md index 2388babbb..ae144c52a 100644 --- a/tutorials/ner_drugs/README.md +++ b/tutorials/ner_drugs/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Detecting drug names in online comments (Named Entity Recognition) +# ๐Ÿช Weasel Project: Detecting drug names in online comments (Named Entity Recognition) This project uses [Prodigy](https://prodi.gy) to bootstrap an NER model to detect drug names in [Reddit comments](https://files.pushshift.io/reddit/comments/). @@ -8,12 +8,12 @@ This project uses [Prodigy](https://prodi.gy) to bootstrap an NER model to detec The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -29,7 +29,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -40,7 +40,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -49,7 +49,7 @@ in the project directory. | [`assets/drugs_eval.jsonl`](assets/drugs_eval.jsonl) | Local | JSONL-formatted development data exported from Prodigy, annotated with `DRUG` entities (500 examples) | | [`assets/drugs_patterns.jsonl`](assets/drugs_patterns.jsonl) | Local | Patterns file generated with `terms.teach` and used to pre-highlight during annotation (118 patterns) | - + ## ๐Ÿ“š Data diff --git a/tutorials/ner_fashion_brands/README.md b/tutorials/ner_fashion_brands/README.md index a6838ab82..b05d39490 100644 --- a/tutorials/ner_fashion_brands/README.md +++ b/tutorials/ner_fashion_brands/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Detecting fashion brands in online comments (Named Entity Recognition) +# ๐Ÿช Weasel Project: Detecting fashion brands in online comments (Named Entity Recognition) This project uses [`sense2vec`](https://github.com/explosion/sense2vec) and [Prodigy](https://prodi.gy) to bootstrap an NER model to detect fashion brands in [Reddit comments](https://files.pushshift.io/reddit/comments/). For more details, see [our blog post](https://explosion.ai/blog/sense2vec-reloaded#annotation). @@ -8,12 +8,12 @@ This project uses [`sense2vec`](https://github.com/explosion/sense2vec) and [Pro The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -28,7 +28,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -39,7 +39,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -48,7 +48,7 @@ in the project directory. | [`assets/fashion_brands_eval.jsonl`](assets/fashion_brands_eval.jsonl) | Local | JSONL-formatted development data exported from Prodigy, annotated with `FASHION_BRAND` entities (500 examples) | | [`assets/fashion_brands_patterns.jsonl`](assets/fashion_brands_patterns.jsonl) | Local | Patterns file generated with `sense2vec.teach` and used to pre-highlight during annotation (100 patterns) | - + --- diff --git a/tutorials/ner_food_ingredients/README.md b/tutorials/ner_food_ingredients/README.md index a26d8b019..2151c57c6 100644 --- a/tutorials/ner_food_ingredients/README.md +++ b/tutorials/ner_food_ingredients/README.md @@ -1,8 +1,8 @@ - + -# ๐Ÿช spaCy Project: Analyzing how mentions of ingredients change over time (Named Entity Recognition) +# ๐Ÿช Weasel Project: Analyzing how mentions of ingredients change over time (Named Entity Recognition) **This project was created as part of a [step-by-step video tutorial](https://www.youtube.com/watch?v=59BKHO_xBPA).** It uses [`sense2vec`](https://github.com/explosion/sense2vec) and [Prodigy](https://prodi.gy) to bootstrap an NER model to detect ingredients [Reddit comments](https://files.pushshift.io/reddit/comments/) and to calculate how mentions change over time. The results were then used to create a [bar chart race visualization](https://public.flourish.studio/visualisation/1532208/) of selected ingredients. @@ -10,12 +10,12 @@ The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -28,7 +28,7 @@ in the project directory. | `assets/tok2vec_cd8_model289.bin` | URL | Pretrained tok2vec weights to initialize the model | | `assets/s2v_reddit_2015_md.tar.gz` | URL | sense2vec vectors trained on Reddit comments of 2015. Used to bootstrap the terminology list | - + ## ๐Ÿงฎ Results diff --git a/tutorials/ner_multiple_trials/README.md b/tutorials/ner_multiple_trials/README.md index a323d9f8d..3d7bf29d6 100644 --- a/tutorials/ner_multiple_trials/README.md +++ b/tutorials/ner_multiple_trials/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Training a named-entity recognition (NER) with multiple trials +# ๐Ÿช Weasel Project: Training a named-entity recognition (NER) with multiple trials This project demonstrates how to train a spaCy pipeline with multiple trials. It trains a named-entity recognition (NER) model on the WikiNEuRal English @@ -27,12 +27,12 @@ summarize. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -46,7 +46,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -57,7 +57,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -66,4 +66,4 @@ in the project directory. | `assets/raw-en-wikineural-dev.iob` | URL | WikiNEuRal (en) dev dataset | | `assets/raw-en-wikineural-test.iob` | URL | WikiNEuRal (en) test dataset | - \ No newline at end of file + \ No newline at end of file diff --git a/tutorials/ner_pytorch_medical/README.md b/tutorials/ner_pytorch_medical/README.md index f7b906f38..5598cb766 100644 --- a/tutorials/ner_pytorch_medical/README.md +++ b/tutorials/ner_pytorch_medical/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Detecting entities in Medical Records with PyTorch +# ๐Ÿช Weasel Project: Detecting entities in Medical Records with PyTorch This project uses the [i2b2 (n2c2) 2011 Challenge Dataset](https://portal.dbmi.hms.harvard.edu/projects/n2c2-nlp/) to bootstrap a PyTorch NER model to detect entities in Medical Records. It also demonstrates how to anonymize medical records for annotators in [Prodigy](https://prodi.gy). @@ -8,12 +8,12 @@ This project uses the [i2b2 (n2c2) 2011 Challenge Dataset](https://portal.dbmi.h The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -31,7 +31,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -42,7 +42,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -52,7 +52,7 @@ in the project directory. | `assets/n2c2_2011/Task_1C.zip` | Local | Zipfile containing original challenge test data | | [`assets/mock_notes.jsonl`](assets/mock_notes.jsonl) | Local | JSONL file with raw mock notes to annotate in prodigy | - + --- diff --git a/tutorials/ner_tweets/README.md b/tutorials/ner_tweets/README.md index 203f60f75..8da4cd054 100644 --- a/tutorials/ner_tweets/README.md +++ b/tutorials/ner_tweets/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Detecting people entities in tweets (Named Entity Recognition) +# ๐Ÿช Weasel Project: Detecting people entities in tweets (Named Entity Recognition) This project demonstrates how to improve spaCy's pretrained models by augmenting the training data and adapting it to a different domain. @@ -10,12 +10,12 @@ augmenting the training data and adapting it to a different domain. The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -32,7 +32,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -45,7 +45,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -58,7 +58,7 @@ in the project directory. | `assets/first_names.json` | URL | A list of first names to help our heuristic annotator | | `assets/en_orth_variants.json` | URL | Orth variants to use for data augmentation | - + # Usage Notes diff --git a/tutorials/ner_tweets/project.yml b/tutorials/ner_tweets/project.yml index bf501b8b0..afdb0ec7b 100644 --- a/tutorials/ner_tweets/project.yml +++ b/tutorials/ner_tweets/project.yml @@ -3,8 +3,6 @@ description: | This project demonstrates how to improve spaCy's pretrained models by augmenting the training data and adapting it to a different domain. -spacy_version: ">=3.3.0,<4.0.0" - # Variables can be referenced across the project.yml using ${vars.var_name} vars: config: "config.cfg" diff --git a/tutorials/ner_tweets/requirements.txt b/tutorials/ner_tweets/requirements.txt index 70288bca4..564d14c05 100644 --- a/tutorials/ner_tweets/requirements.txt +++ b/tutorials/ner_tweets/requirements.txt @@ -2,3 +2,5 @@ skweak==0.2.13 hmmlearn==0.2.6 nlpaug + +spacy>=3.3.0,<4.0.0 diff --git a/tutorials/parser_low_resource/README.md b/tutorials/parser_low_resource/README.md index f18d623f1..c5524afd3 100644 --- a/tutorials/parser_low_resource/README.md +++ b/tutorials/parser_low_resource/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Training a POS tagger and dependency parser for a low-resource language +# ๐Ÿช Weasel Project: Training a POS tagger and dependency parser for a low-resource language This project trains a part-of-speech tagger and dependency parser for a low-resource language such as Tagalog. We will be using the @@ -24,12 +24,12 @@ How to implement this split will be demonstrated in this project The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -41,7 +41,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -52,7 +52,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -60,4 +60,4 @@ in the project directory. | `assets/tl_trg-ud-test.conllu` | URL | Treebank data for UD_Tagalog-TRG | | `assets/tl_ugnayan-ud-test.conllu` | URL | Treebank data for UD_Tagalog-Ugnayan | - \ No newline at end of file + \ No newline at end of file diff --git a/tutorials/rel_component/README.md b/tutorials/rel_component/README.md index 9ce596555..1ad5356a8 100644 --- a/tutorials/rel_component/README.md +++ b/tutorials/rel_component/README.md @@ -1,9 +1,9 @@ - + -# ๐Ÿช spaCy Project: Example project of creating a novel nlp component to do relation extraction from scratch. +# ๐Ÿช Weasel Project: Example project of creating a novel nlp component to do relation extraction from scratch. This example project shows how to implement a spaCy component with a custom Machine Learning model, how to train it with and without a transformer, and how to apply it on an evaluation dataset. @@ -11,12 +11,12 @@ This example project shows how to implement a spaCy component with a custom Mach The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -30,7 +30,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -42,11 +42,11 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | | --- | --- | --- | | [`assets/annotations.jsonl`](assets/annotations.jsonl) | Local | Gold-standard REL annotations created with Prodigy | - + diff --git a/tutorials/spancat_food_ingredients/README.md b/tutorials/spancat_food_ingredients/README.md index 037a56326..998a99a41 100644 --- a/tutorials/spancat_food_ingredients/README.md +++ b/tutorials/spancat_food_ingredients/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Span Categorization in Prodigy +# ๐Ÿช Weasel Project: Span Categorization in Prodigy This project shows how to use Prodigy to annotate data for the spancat component @@ -8,12 +8,12 @@ This project shows how to use Prodigy to annotate data for the spancat component The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -29,7 +29,7 @@ Commands are only re-run if their inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -39,4 +39,4 @@ in the project directory. | [`assets/patterns.jsonl`](assets/patterns.jsonl) | Local | Example patterns for pre-selecting spans in text. | | [`prodigy.json`](prodigy.json) | Local | Example prodigy.json file for using instruction files. | - \ No newline at end of file + \ No newline at end of file diff --git a/tutorials/spanruler_restaurant_reviews/README.md b/tutorials/spanruler_restaurant_reviews/README.md index ea488d335..fc68c63a1 100644 --- a/tutorials/spanruler_restaurant_reviews/README.md +++ b/tutorials/spanruler_restaurant_reviews/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Using SpanRuler for rule-based Named Entity Recognition +# ๐Ÿช Weasel Project: Using SpanRuler for rule-based Named Entity Recognition This example project demonstrates how you can use the [SpanRuler](https://spacy.io/api/spanruler), a component introduced in spaCy @@ -83,12 +83,12 @@ Conference on Acoustics, Speech and Signal Processing*, pages 8386-8390 The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -111,7 +111,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -125,7 +125,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -135,4 +135,4 @@ in the project directory. | [`assets/train_review.jsonl`](assets/train_review.jsonl) | Local | JSONL-formatted training data exported from Prodigy (7662 examples) | | [`assets/test_review.jsonl`](assets/test_review.jsonl) | Local | JSONL-formatted test data exported from Prodigy (1521 examples) | - + diff --git a/tutorials/textcat_docs_issues/README.md b/tutorials/textcat_docs_issues/README.md index 1db6f361d..36f299633 100644 --- a/tutorials/textcat_docs_issues/README.md +++ b/tutorials/textcat_docs_issues/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Predicting whether a GitHub issue is about documentation (Text Classification) +# ๐Ÿช Weasel Project: Predicting whether a GitHub issue is about documentation (Text Classification) This project uses [spaCy](https://spacy.io) with annotated data from [Prodigy](https://prodi.gy) to train a **binary text classifier** to predict whether a GitHub issue title is about documentation. The pipeline uses the component `textcat_multilabel` in order to train a binary classifier using only one label, which can be True or False for each document. An equivalent alternative for a binary text classifier would be to use the `textcat` component with two labels, where exactly one of the two labels is True for each document. @@ -8,12 +8,12 @@ This project uses [spaCy](https://spacy.io) with annotated data from [Prodigy](h The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -25,7 +25,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -36,7 +36,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -44,7 +44,7 @@ in the project directory. | [`assets/docs_issues_training.jsonl`](assets/docs_issues_training.jsonl) | Local | JSONL-formatted training data exported from Prodigy, annotated with `DOCUMENTATION` (661 examples) | | [`assets/docs_issues_eval.jsonl`](assets/docs_issues_eval.jsonl) | Local | JSONL-formatted development data exported from Prodigy, annotated with `DOCUMENTATION` (500 examples) | - + ## ๐Ÿ“š Data diff --git a/tutorials/textcat_docs_issues/project.yml b/tutorials/textcat_docs_issues/project.yml index 595c5dad3..03a0b4510 100644 --- a/tutorials/textcat_docs_issues/project.yml +++ b/tutorials/textcat_docs_issues/project.yml @@ -8,7 +8,6 @@ vars: train: "docs_issues_training" dev: "docs_issues_eval" gpu_id: -1 -spacy_version: ">=3.0.6,<4.0.0" # These are the directories that the project needs. The project CLI will make # sure that they always exist. diff --git a/tutorials/textcat_goemotions/README.md b/tutorials/textcat_goemotions/README.md index b06d5f529..e6006c1c7 100644 --- a/tutorials/textcat_goemotions/README.md +++ b/tutorials/textcat_goemotions/README.md @@ -1,6 +1,6 @@ - + -# ๐Ÿช spaCy Project: Categorization of emotions in Reddit posts (Text Classification) +# ๐Ÿช Weasel Project: Categorization of emotions in Reddit posts (Text Classification) This project uses spaCy to train a text classifier on the [GoEmotions dataset](https://github.com/google-research/google-research/tree/master/goemotions) with options for a pipeline with and without transformer weights. To use the BERT-based config, change the `config` variable in the `project.yml`. @@ -11,12 +11,12 @@ This project uses spaCy to train a text classifier on the [GoEmotions dataset](h The [`project.yml`](project.yml) defines the data assets required by the project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation](https://spacy.io/usage/projects). +[Weasel documentation](https://github.com/explosion/weasel). ### โฏ Commands The following commands are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run). +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run). Commands are only re-run if their inputs have changed. | Command | Description | @@ -32,7 +32,7 @@ Commands are only re-run if their inputs have changed. ### โญ Workflows The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`](https://spacy.io/api/cli#project-run) +can be executed using [`weasel run [name]`](https://github.com/explosion/weasel/tree/main/docs/cli.md#rocket-run) and will run the specified commands in order. Commands are only re-run if their inputs have changed. @@ -43,7 +43,7 @@ inputs have changed. ### ๐Ÿ—‚ Assets The following assets are defined by the project. They can -be fetched by running [`spacy project assets`](https://spacy.io/api/cli#project-assets) +be fetched by running [`weasel assets`](https://github.com/explosion/weasel/tree/main/docs/cli.md#open_file_folder-assets) in the project directory. | File | Source | Description | @@ -53,7 +53,7 @@ in the project directory. | `assets/dev.tsv` | URL | The development data | | `assets/test.tsv` | URL | The test data | - + ## Usage