diff --git a/sparv/modules/conll_export/metadata.yaml b/sparv/modules/conll_export/metadata.yaml index 1df61c32..2eb357e4 100644 --- a/sparv/modules/conll_export/metadata.yaml +++ b/sparv/modules/conll_export/metadata.yaml @@ -1,4 +1,4 @@ -id: export-conllu +id: sbx-swe-export-conllu name: swe: CoNLL-U-export eng: CoNLL-U export diff --git a/sparv/modules/geo/metadata.yaml b/sparv/modules/geo/metadata.yaml index a7325bbc..19d4b758 100644 --- a/sparv/modules/geo/metadata.yaml +++ b/sparv/modules/geo/metadata.yaml @@ -6,7 +6,6 @@ language_codes: keywords: [] standard_reference: '' other_references: [] -tool: '' model: "[GeoNames](https://www.geonames.org/)" trained_on: '' tagset: '' @@ -14,7 +13,7 @@ evaluation_results: '' created: 2018-05-28 updated: 2022-05-18 --- -id: swe-geotagcontext-sparv +id: sbx-swe-geotagcontext-sparv parent: geo-parent name: swe: Geotaggning av platsnamn från kontext @@ -67,7 +66,7 @@ description: the [GeoNames database](https://www.geonames.org/). This annotation can be applied to any text chunk, e.g. texts, paragraphs, sentences or tokens. --- -id: swe-geotagmetadata-sparv +id: sbx-swe-geotagmetadata-sparv parent: geo-parent name: swe: Geotagging av platsnamn från metadata diff --git a/sparv/modules/hunpos/metadata.yaml b/sparv/modules/hunpos/metadata.yaml index 9c21566b..4b8c7cc4 100644 --- a/sparv/modules/hunpos/metadata.yaml +++ b/sparv/modules/hunpos/metadata.yaml @@ -5,12 +5,16 @@ language_codes: standard_reference: '' other_references: - "Hunpos: https://code.google.com/archive/p/hunpos/" -tool: "Hunpos" +tool: + name: Hunpos + url: "https://code.google.com/archive/p/hunpos/" +licences: + tool: BSD-3-Clause trained_on: "[SUC3](https://spraakbanken.gu.se/resurser/suc3)" tagset: "[SUC3](https://spraakbanken.gu.se/korp/markup/msdtags.html)" evaluation_results: '' --- -id: swe-pos-hunpos-suc3 +id: sbx-swe-pos-hunpos-suc3 parent: hunpos-parent name: swe: SUC-ordklasstaggning med Hunpos @@ -42,7 +46,7 @@ description: created: 2010-12-15 updated: 2018-05-28 --- -id: swe-msd-hunpos-suc3 +id: sbx-swe-msd-hunpos-suc3 parent: hunpos-parent name: swe: Morfosyntaktisk SUC-taggning med Hunpos @@ -74,7 +78,7 @@ description: created: 2010-12-15 updated: 2018-05-28 --- -id: swe-pos-hunpos-suc3-1800 +id: sbx-swe-pos-hunpos-suc3-1800 parent: hunpos-parent name: swe: SUC-ordklasstaggning med Hunpos för 1800-talssvenska @@ -124,7 +128,7 @@ description: created: 2012-10-23 updated: 2015-09-11 --- -id: swe-msd-hunpos-suc3-1800 +id: sbx-swe-msd-hunpos-suc3-1800 parent: hunpos-parent name: swe: Morfosyntaktisk SUC-taggning med Hunpos för 1800-talssvenska diff --git a/sparv/modules/lexical_classes/metadata.yaml b/sparv/modules/lexical_classes/metadata.yaml index ffcab819..866d3794 100644 --- a/sparv/modules/lexical_classes/metadata.yaml +++ b/sparv/modules/lexical_classes/metadata.yaml @@ -4,7 +4,6 @@ language_codes: - swe keywords: [] abstract: true -tool: '' trained_on: |- Reference corpora for relative frequencies: [Göteborgsposten 2008](https://spraakbanken.gu.se/resurser/gp2008), [SUC 3.0](https://spraakbanken.gu.se/resurser/suc3), [Bonniersromaner I @@ -21,7 +20,7 @@ other_references: tagset: "[Blingbring](https://spraakbanken.gu.se/resurser/blingbring)" model: "[Blingbring frequency model](https://github.com/spraakbanken/sparv-models/blob/master/lexical_classes/blingbring.freq.gp2008%2Bsuc3%2Bromi.pickle)" --- -id: swe-lexical_classes_token-sparv-blingbring +id: sbx-swe-lexical_classes_token-sparv-blingbring parent: blingbring-parent name: swe: Lexikala klasser från Blingbring, tokennivå @@ -65,7 +64,7 @@ description: versions of Blingbring. created: 2017-09-05 --- -id: swe-lexical_classes_text-sparv-blingbring +id: sbx-swe-lexical_classes_text-sparv-blingbring parent: blingbring-parent name: swe: Lexikala klasser från Blingbring, textnivå @@ -148,7 +147,7 @@ other_references: tagset: "[Swedish FrameNet (SweFN)](https://spraakbanken.gu.se/resurser/swefn)" model: "[Frequency model](https://github.com/spraakbanken/sparv-models/blob/master/lexical_classes/swefn.freq.gp2008%2Bsuc3%2Bromi.pickle)" --- -id: swe-lexical_classes_token-sparv-swefn +id: sbx-swe-lexical_classes_token-sparv-swefn parent: swefn-parent name: swe: Lexikala klasser från SweFN, tokennivå @@ -186,7 +185,7 @@ description: classes. created: 2017-09-21 --- -id: swe-lexical_classes_text-sparv-swefn +id: sbx-swe-lexical_classes_text-sparv-swefn parent: swefn-parent name: swe: Lexikala klasser från SweFN, textnivå diff --git a/sparv/modules/malt/metadata.yaml b/sparv/modules/malt/metadata.yaml index 307461f3..ff73f82f 100644 --- a/sparv/modules/malt/metadata.yaml +++ b/sparv/modules/malt/metadata.yaml @@ -1,4 +1,4 @@ -id: swe-dependency-malt-treebank +id: sbx-swe-dependency-malt-treebank name: swe: Dependensparsning med MaltParser eng: Dependency parsing with MaltParser @@ -35,7 +35,9 @@ standard_reference: |- other_references: - "Maltparser: https://www.maltparser.org/download.html" - 'https://aclanthology.org/2021.nodalida-main.20/' -tool: "Maltparser" +tool: + name: Maltparser + url: "https://www.maltparser.org/" model: "[Swemalt](https://www.maltparser.org/mco/swedish_parser/swemalt.html)" trained_on: "[Svensk trädbank (the TalbankenSTB part)](https://spraakbanken.gu.se/resurser/sv-treebank)" tagset: "[MambaDep](https://svn.spraakdata.gu.se/sb-arkiv/pub/mamba.html)" diff --git a/sparv/modules/phrase_structure/metadata.yaml b/sparv/modules/phrase_structure/metadata.yaml index ef2bdd3f..5e2a83b4 100644 --- a/sparv/modules/phrase_structure/metadata.yaml +++ b/sparv/modules/phrase_structure/metadata.yaml @@ -1,4 +1,4 @@ -id: swe-phrasestructure-sparv +id: sbx-swe-phrasestructure-sparv name: swe: Svensk frasstrukturparsning eng: Swedish phrase structure parsing @@ -45,7 +45,6 @@ example_output: |- ``` standard_reference: '' other_references: [] -tool: '' model: "Method has no model" trained_on: "[TalbankenSBX](https://spraakbanken.gu.se/resurser/talbanken)" tagset: "See description below" diff --git a/sparv/modules/readability/metadata.yaml b/sparv/modules/readability/metadata.yaml index 95e28b5d..56cd61a4 100644 --- a/sparv/modules/readability/metadata.yaml +++ b/sparv/modules/readability/metadata.yaml @@ -5,7 +5,6 @@ language_codes: - swe keywords: [] other_references: [] -tool: '' model: '' trained_on: '' tagset: '' @@ -13,7 +12,7 @@ evaluation_results: '' created: 2018-03-28 updated: 2018-03-28 --- -id: swe-readability-sparv-lix +id: sbx-swe-readability-sparv-lix parent: readability-parent name: swe: Annotering av läsbarhetsindex (LIX) för texter @@ -76,7 +75,7 @@ description: average word count per sentence and ratio of long words (exceeding six letters). The value is calculated as O / M + L x 100 / O, where O = word count, M = sentence count and L = long word count. --- -id: swe-readability-sparv-nk +id: sbx-swe-readability-sparv-nk parent: readability-parent name: swe: Annotering av Nominalkvot (NK) för texter @@ -137,7 +136,7 @@ description: dividing this by the number of verbs, adverbs and pronouns. A high nominal ratio suggests a high density of information, which can also mean that the text is difficult to read. --- -id: swe-readability-sparv-ovix +id: sbx-swe-readability-sparv-ovix parent: readability-parent name: swe: Annotering av Ordvariationsindex (OVIX) för texter diff --git a/sparv/modules/saldo/metadata.yaml b/sparv/modules/saldo/metadata.yaml index 77ec3725..80ade651 100644 --- a/sparv/modules/saldo/metadata.yaml +++ b/sparv/modules/saldo/metadata.yaml @@ -4,13 +4,12 @@ language_codes: - swe standard_reference: "[Borin/Forsberg/Lönngren 2013: SALDO: a touch of yin to WordNet's yang](http://dx.doi.org/10.1007/s10579-013-9233-4)" other_references: [] -tool: "Sparv" model: "[SALDO's morphology](https://spraakbanken.gu.se/resurser/saldom)" trained_on: '' tagset: '' evaluation_results: '' --- -id: swe-lemmatization-sparv-saldo +id: sbx-swe-lemmatization-sparv-saldo parent: saldo-parent name: swe: Annotering av SALDO-grundformer @@ -44,7 +43,7 @@ description: created: 2010-12-15 updated: 2018-03-28 --- -id: swe-lemgram-sparv-saldo +id: sbx-swe-lemgram-sparv-saldo parent: saldo-parent name: swe: Annotering av SALDO-lemgram @@ -77,7 +76,7 @@ description: created: 2010-12-15 updated: 2018-03-28 --- -id: swe-sense-sparv-saldo +id: sbx-swe-sense-sparv-saldo parent: saldo-parent name: swe: Annotering av SALDO-identifierare @@ -107,7 +106,7 @@ description: created: 2010-12-15 updated: 2018-03-28 --- -id: swe-compound-sparv-saldolemgram +id: sbx-swe-compound-sparv-saldolemgram parent: saldo-parent name: swe: Sammansättningsanalys med hjälp av SALDO-lemgram @@ -148,7 +147,7 @@ description: created: 2018-03-28 updated: 2020-07-09 --- -id: swe-compound-sparv-saldowords +id: sbx-swe-compound-sparv-saldowords parent: saldo-parent name: swe: Sammansättningsanalys med hjälp av SALDO-ordformer @@ -189,7 +188,7 @@ description: created: 2018-03-28 updated: 2020-07-09 --- -id: swe-lemmatization-sparv-saldo2 +id: sbx-swe-lemmatization-sparv-saldo2 parent: saldo-parent name: swe: Annotering av SALDO-grundformer (utökade) diff --git a/sparv/modules/segment/metadata.yaml b/sparv/modules/segment/metadata.yaml index 0427ad7a..9b455bee 100644 --- a/sparv/modules/segment/metadata.yaml +++ b/sparv/modules/segment/metadata.yaml @@ -4,7 +4,9 @@ abstract: true keywords: [] standard_reference: "Bird, Steven, Edward Loper and Ewan Klein (2009), Natural Language Processing with Python. O’Reilly Media Inc." other_references: [] -tool: "NLTK" +tool: + name: NLTK + url: "https://www.nltk.org/" model: '' trained_on: '' tagset: '' @@ -17,7 +19,7 @@ description: created: 2010-12-15 updated: 2021-05-07 --- -id: tokenization-sparv-linebreaks +id: sbx-mul-tokenization-sparv-linebreaks parent: segment-nltk-parent name: swe: Radbrytningstokenisering @@ -44,7 +46,7 @@ example_extra: |- token_segmenter: linebreaks ``` --- -id: sentence-sparv-linebreaks +id: sbx-mul-sentence-sparv-linebreaks parent: segment-nltk-parent name: swe: Radbrytningssegmentering, meningar @@ -80,7 +82,7 @@ example_extra: |- sentence_segmenter: linebreaks ``` --- -id: paragraph-sparv-linebreaks +id: sbx-mul-paragraph-sparv-linebreaks parent: segment-nltk-parent name: swe: Radbrytningssegmentering, stycken @@ -120,7 +122,7 @@ example_extra: |- paragraph_segmenter: linebreaks ``` --- -id: tokenization-sparv-blanklines +id: sbx-mul-tokenization-sparv-blanklines parent: segment-nltk-parent name: swe: Tomradstokenisering @@ -147,7 +149,7 @@ example_extra: |- token_segmenter: blanklines ``` --- -id: sentence-sparv-blanklines +id: sbx-mul-sentence-sparv-blanklines parent: segment-nltk-parent name: swe: Tomradssegmentering, meningar @@ -183,7 +185,7 @@ example_extra: |- sentence_segmenter: blanklines ``` --- -id: paragraph-sparv-blanklines +id: sbx-mul-paragraph-sparv-blanklines parent: segment-nltk-parent name: swe: Tomradssegmentering, stycken @@ -223,7 +225,7 @@ example_extra: |- paragraph_segmenter: blanklines ``` --- -id: tokenization-sparv-whitespace +id: sbx-mul-tokenization-sparv-whitespace parent: segment-nltk-parent name: swe: Blankteckentokenisering @@ -250,7 +252,7 @@ example_extra: |- token_segmenter: whitespace ``` --- -id: sentence-sparv-whitespace +id: sbx-mul-sentence-sparv-whitespace parent: segment-nltk-parent name: swe: Blankteckensegmentering, meningar @@ -286,7 +288,7 @@ example_extra: |- sentence_segmenter: whitespace ``` --- -id: paragraph-sparv-whitespace +id: sbx-mul-paragraph-sparv-whitespace parent: segment-nltk-parent name: swe: Blankteckensegmentering, stycken @@ -326,7 +328,7 @@ example_extra: |- paragraph_segmenter: whitespace ``` --- -id: swe-tokenization-sparv-betterword +id: sbx-swe-tokenization-sparv-betterword parent: segment-nltk-parent name: swe: Svensk tokenisering @@ -366,7 +368,7 @@ description: tokenizer for other languages. updated: 2021-05-07 --- -id: swe-sentence-punkt-storsuc +id: sbx-swe-sentence-punkt-storsuc parent: segment-nltk-parent name: swe: Svensk meningssegmentering @@ -413,7 +415,7 @@ description: is, however, possible to configure the sentence segmenter for other languages. updated: 2021-09-02 --- -id: sentence-punkt +id: sbx-mul-sentence-punctuation parent: segment-nltk-parent name: swe: Meningssegmentering utifrån skiljetecken diff --git a/sparv/modules/sensaldo/metadata.yaml b/sparv/modules/sensaldo/metadata.yaml index ec4822dc..f57e276e 100644 --- a/sparv/modules/sensaldo/metadata.yaml +++ b/sparv/modules/sensaldo/metadata.yaml @@ -1,4 +1,4 @@ -id: swe-sentiment-sparv-sensaldo +id: sbx-swe-sentiment-sparv-sensaldo name: swe: Sentimentanalys per token med SenSALDO eng: Sentiment analysis per token using SenSALDO @@ -49,7 +49,6 @@ standard_reference: 'http://www.lrec-conf.org/proceedings/lrec2018/summaries/857 other_references: - http://www.lrec-conf.org/proceedings/lrec2018/summaries/846.html - https://gup.ub.gu.se/publication/264721?lang=sv -tool: '' model: "[Sensaldo](https://spraakbanken.gu.se/resurser/sensaldo)" trained_on: '' tagset: '' diff --git a/sparv/modules/stanza/metadata.yaml b/sparv/modules/stanza/metadata.yaml index 206650e2..3df1c333 100644 --- a/sparv/modules/stanza/metadata.yaml +++ b/sparv/modules/stanza/metadata.yaml @@ -4,7 +4,11 @@ language_codes: - swe keywords: - stanza -tool: "Stanza" +tool: + name: "Stanza" + url: "https://stanfordnlp.github.io/stanza/" +licences: + tool: "Apache License 2.0" trained_on: "[SUC3](https://spraakbanken.gu.se/resurser/suc3), [TalbankenSBX](https://spraakbanken.gu.se/resurser/talbanken), [SIC2](https://spraakbanken.gu.se/resurser/sic2)" other_references: - "Stanza: Peng Qi, Yuhao Zhang, Yuhui Zhang, Jason Bolton and Christopher D. Manning. 2020" @@ -27,7 +31,7 @@ intended_uses: created: 2020-12-07 updated: 2022-08-10 --- -id: swe-pos-stanza-stanzamorph +id: sbx-swe-pos-stanza-stanzamorph parent: stanza-parent-swe name: swe: SUC-ordklasstaggning med Stanza @@ -55,7 +59,7 @@ description: Currently (in 2024), this is the default analysis for Swedish in Språkbanken's analysis platform [Sparv](https://spraakbanken.gu.se/sparv). --- -id: swe-msd-stanza-stanzamorph-suc3 +id: sbx-swe-msd-stanza-stanzamorph-suc3 parent: stanza-parent-swe name: swe: Morfosyntaktisk SUC-taggning med Stanza @@ -86,7 +90,7 @@ description: Currently (in 2024), this is the default analysis for Swedish in Språkbanken's analysis platform [Sparv](https://spraakbanken.gu.se/sparv). --- -id: swe-msd-stanza-stanzamorph-ufeats +id: sbx-swe-msd-stanza-stanzamorph-ufeats parent: stanza-parent-swe name: swe: Morfologisk analys för svenska baserad på Stanza @@ -120,7 +124,7 @@ description: eng: |- This analysis uses universal features, defined as part of the Universal Dependencies standard. --- -id: swe-lemmatization-stanza-stanzalem +id: sbx-swe-lemmatization-stanza-stanzalem parent: stanza-parent-swe name: swe: SUC3-grundformanalys med Stanza @@ -154,7 +158,7 @@ description: compatible with SALDO-style lemmas. This model's advantage is that it can be used to lemmatize any token, including out-of-vocabulary tokens. --- -id: swe-dependency-stanza-stanzasynt +id: sbx-swe-dependency-stanza-stanzasynt parent: stanza-parent-swe name: swe: Dependensparsning med Stanza @@ -194,7 +198,6 @@ language_codes: keywords: - stanza standard_reference: '' -tool: "Stanza" trained_on: '' other_references: - "Stanza: Peng Qi, Yuhao Zhang, Yuhui Zhang, Jason Bolton and Christopher D. Manning. 2020" @@ -204,7 +207,7 @@ model: Stanza standard model for English (https://stanfordnlp.github.io/stanza/m created: 2022-08-10 updated: 2022-08-10 --- -id: eng-pos-stanza +id: sbx-eng-pos-stanza parent: stanza-parent-eng name: swe: Ordklasstaggning med Stanza för engelska @@ -231,7 +234,7 @@ example_extra: |- language: eng ``` --- -id: eng-sentence-stanza +id: sbx-eng-sentence-stanza parent: stanza-parent-eng name: swe: Meningssegmentering för engelska med Stanza @@ -273,7 +276,7 @@ example_extra: |- language: eng ``` --- -id: eng-tokenization-stanza +id: sbx-eng-tokenization-stanza parent: stanza-parent-eng name: swe: Tokenisering för engelska med Stanza @@ -302,7 +305,7 @@ example_extra: |- token: stanza.token ``` --- -id: eng-lemmatization-stanza +id: sbx-eng-lemmatization-stanza parent: stanza-parent-eng name: swe: Lemmatisering för engelska med Stanza @@ -331,7 +334,7 @@ example_extra: |- language: eng ``` --- -id: eng-dependency-stanza +id: sbx-eng-dependency-stanza parent: stanza-parent-eng name: swe: Dependensparsning för engelska med Stanza @@ -363,7 +366,7 @@ example_extra: |- language: eng ``` --- -id: eng-namedentity-stanza +id: sbx-eng-namedentity-stanza parent: stanza-parent-eng name: swe: Namnigenkänning för engelska med Stanza @@ -415,7 +418,7 @@ description: Named entity recognition (NER) enables the detection of named entities (e.g. personal names, organizations, geographical locations) in the text. --- -id: eng-pos-stanza-upos +id: sbx-eng-pos-stanza-upos parent: stanza-parent-eng name: swe: UD-Ordklasstaggning med Stanza för engelska @@ -442,7 +445,7 @@ example_extra: |- language: eng ``` --- -id: eng-msd-stanza-ufeats +id: sbx-eng-msd-stanza-ufeats parent: stanza-parent-eng name: swe: Morfologisk analys för engelska baserad på Stanza diff --git a/sparv/modules/swener/metadata.yaml b/sparv/modules/swener/metadata.yaml index 18b18da2..244aca69 100644 --- a/sparv/modules/swener/metadata.yaml +++ b/sparv/modules/swener/metadata.yaml @@ -1,4 +1,4 @@ -id: swe-namedentity-swener +id: sbx-swe-namedentity-swener name: swe: Namnigenkänning med HFST-SweNER eng: Named entity recognition with HFST-SweNER @@ -57,7 +57,11 @@ standard_reference: |- other_references: - "[Dimitrios Kokkinakis. 2004. Reducing the effect of name explosion](https://demo.spraakbanken.gu.se/svedk/pbl/kokkinakisBNER.pdf)" - "Download HFST-SweNER: https://www.kielipankki.fi/download/HFST-SweNER/" -tool: "HFST-SweNER" +tool: + name: "HFST-SweNER" + url: "https://www.kielipankki.fi/download/HFST-SweNER/" +licences: + tool: CC-BY model: "Included in the tool" trained_on: '' tagset: "[Named entity tags from hfst-SweNER](https://svn.spraakdata.gu.se/sb-arkiv/pub/swener-tags.html)" diff --git a/sparv/modules/wsd/metadata.yaml b/sparv/modules/wsd/metadata.yaml index dac4f921..38c61433 100644 --- a/sparv/modules/wsd/metadata.yaml +++ b/sparv/modules/wsd/metadata.yaml @@ -1,4 +1,4 @@ -id: swe-sense-wsd +id: sbx-swe-sense-wsd name: swe: Betydelsedisambiguering med hjälp av SALDO ID:n eng: Sense disambiguation of SALDO identifiers @@ -42,7 +42,11 @@ standard_reference: 'https://aclanthology.org/N15-1164.pdf' other_references: - https://github.com/spraakbanken/sparv-wsd/blob/master/README.pdf - "Sparv wsd: https://github.com/spraakbanken/sparv-wsd" -tool: "Sparv wsd" +tool: + name: Sparv wsd + url: https://github.com/spraakbanken/sparv-wsd +licences: + tool: MIT License model: |- - [ALL_512_128_w10_A2_140403_ctx1.bin](https://github.com/spraakbanken/sparv-wsd/blob/master/models/scouse/ALL_512_128_w10_A2_140403_ctx1.bin) - [lem_cbow0_s512_w10_NEW2_ctx.bin](https://github.com/spraakbanken/sparv-wsd/blob/master/models/scouse/lem_cbow0_s512_w10_NEW2_ctx.bin) diff --git a/sparv/modules/xml_export/metadata.yaml b/sparv/modules/xml_export/metadata.yaml index 11a39d39..d8110530 100644 --- a/sparv/modules/xml_export/metadata.yaml +++ b/sparv/modules/xml_export/metadata.yaml @@ -6,7 +6,6 @@ keywords: [] example_extra: '' standard_reference: '' other_references: [] -tool: '' model: '' trained_on: '' tagset: '' @@ -16,7 +15,7 @@ description: swe: '' updated: 2023-11-06 --- -id: export-xml-pretty +id: sbx-zxx-export-xml-pretty parent: xml-export-parent name: swe: XML-export, finformaterad @@ -192,7 +191,7 @@ example_output: |- ``` created: 2011-09-05 --- -id: export-xml-preserved +id: sbx-zxx-export-xml-preserved parent: xml-export-parent name: swe: XML-export, bevarat format @@ -215,7 +214,7 @@ example_output: |- ``` created: 2016-05-31 --- -id: export-xml-scrambled +id: sbx-zxx-export-xml-scrambled parent: xml-export-parent name: swe: XML-export, omkastad