Skip to content

Commit

Permalink
added Infrastructure field to academic corpora
Browse files Browse the repository at this point in the history
  • Loading branch information
kreetrapper committed Aug 20, 2024
1 parent f068f5d commit bcfb622
Show file tree
Hide file tree
Showing 31 changed files with 31 additions and 0 deletions.
1 change: 1 addition & 0 deletions corpora/academic-corpora/ac-lit.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "",
"Size": ["9 million words"],
"Annotation": ["no linguistic annotation"],
"Infrastructure": "Other",
"Access": {
"Concordancer": "http://coralit.lt/en/node/18"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/aca-hum.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["14.5 million tokens"],
"Annotation": [],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "https://spraakbanken.gu.se/korp/?corpus=sweachum"
"Download": "http://hdl.handle.net/10794/49"
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/aca-soc.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["10.8 million tokens"],
"Annotation": ["sentence segmentation"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "https://spraakbanken.gu.se/korp/?corpus=sweacsam"
"Download": "http://hdl.handle.net/10794/50"
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/acl-anth.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY SA",
"Size": ["75 million tokens"],
"Annotation": ["PoS-tagged", "lemmatised", "author/text metadata"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "https://www.sketchengine.eu/acl-anthology-reference-corpus-arc/",
"Download": "https://doi.org/10.35111/rfeg-z495"
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/acnz.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "",
"Size": ["3.5 million words"],
"Annotation": [],
"Infrastructure": "Other",
"Access": {
"Download": ""
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/chambers-lb.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "Oxford Text Archive licence (academic use)",
"Size": ["1 million words"],
"Annotation": ["No annotation"],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.handle.net/20.500.14106/2527"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/czec-soc.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "MIT",
"Size": ["3 million words"],
"Annotation": [],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.handle.net/11372/LRT-2703"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/eng-sci.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "restricted",
"Size": ["35 million tokens"],
"Annotation": ["PoS-tagged", "lemmatised", "author/text metadata", "document structure"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "https://hdl.handle.net/11858/00-246C-0000-0023-8CF9-6"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/est-sci.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CLARIN ACA-NC",
"Size": ["5 million words"],
"Annotation": [],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.handle.net/11297/1-00-0000-0000-0000-0002-4"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/genia.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "free but unspecified",
"Size": ["437,000 words"],
"Annotation": ["PoS-tagged", "syntactically parsed", "annotated for terms, events, semantic relations and coreference", "text metadata"],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://www.geniaproject.org/genia-corpus"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/jezkor.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["9.3 million tokens"],
"Annotation": ["PoS-tagged (UD)", "MSD-tagged (UD & MULTEXT-East)", "lemmatised", "annotated for named entities and author/text metadata"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer (noSketchEngine)": "https://www.clarin.si/ske/#dashboard?corpname=jezkor"
"Concordancer (KonText)": "https://www.clarin.si/kontext/query?corpname=jezkor"
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/kas.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CLARIN.SI Licence ACA ID-BY-NC-INF-NORED 1.0",
"Size": ["1.5 billion tokens"],
"Annotation": ["MSD-tagged", "lemmatised", "marked for bilingual and monolingual term candidates"],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.handle.net/11356/1448"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/kiap.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC-BY 4.0",
"Size": ["3.9 million tokens"],
"Annotation": ["PoS-tagged"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://clarino.uib.no/korpuskel/landing-page?identifier=kiap&view=short"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/lit-trans.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC-BY-SA",
"Size": ["48,300 words"],
"Annotation": [],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.grnet.gr/11500/KEG-0000-0000-24F2-6"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/modern-greek.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC-BY-SA",
"Size": ["113,000 words"],
"Annotation": [],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.grnet.gr/11500/KEG-0000-0000-2502-4"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/muchmore.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "free but unspecified",
"Size": ["1 million tokens"],
"Annotation": ["PoS/MSD-tagged", "phrase chunking", "semantic class and relations", "document structure"],
"Infrastructure": "Other",
"Access": {
"Download": "http://muchmore.dfki.de/resources1.htm"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/open-slo.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY-SA",
"Size": ["326 million tokens"],
"Annotation": ["PoS-tagged (UD)", "MSD-tagged (UD & MULTEXT-East)", "lemmatised", "annotated for named entities and author/text metadata"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer (noSketchEngine)": "https://www.clarin.si/ske/#dashboard?corpname=oss10"
"Concordancer (KonText)": "https://www.clarin.si/kontext/query?corpname=oss10"
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/orossimo.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC-BY",
"Size": ["2.5 million tokens"],
"Annotation": ["marked for term candidates", "mixed structural annotation"],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-2410-5"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/reading.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "restricted",
"Size": [],
"Annotation": [],
"Infrastructure": "Other",
"Access": {
"Download": ""
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/roger.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY-NC-ND",
"Size": ["3.3 million words"],
"Annotation": [],
"Infrastructure": "Other",
"Access": {
"Concordancer": "https://roger-corpus.org/login.php"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/roysoc.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["32 million tokens"],
"Annotation": ["PoS-tagged", "lemmatised", "normalised", "author and document metadata"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://fedora.clarin-d.uni-saarland.de/rsc_v4/access.html#cqpweb"
"Download": "http://fedora.clarin-d.uni-saarland.de/rsc_v4/access.html#download"
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/scientext.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["20 million words"],
"Annotation": [],
"Infrastructure": "Other",
"Access": {
"Concordancer": "http://scientext.hypotheses.org/corpus"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/span-eng.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "",
"Size": ["5.7 million words"],
"Annotation": [],
"Infrastructure": "Other",
"Access": {
"Download": ""
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/ufal-papers.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["2 million words"],
"Annotation": ["document aligned"],
"Infrastructure": "CLARIN",
"Access": {
"Download": "http://hdl.handle.net/11234/1-1731"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/uh-eng.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["200 million tokens"],
"Annotation": ["PoS-tagged", "syntactically parsed"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://urn.fi/urn:nbn:fi:lb-2016102101"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/uh-fin.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["12.5 million tokens"],
"Annotation": ["PoS-tagged", "lemmatised"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://urn.fi/urn:nbn:fi:lb-2016101801"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/uh-fra.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["580,000 tokens"],
"Annotation": [],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://urn.fi/urn:nbn:fi:lb-2016102803"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/uh-ger.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["560,000 tokens"],
"Annotation": ["No annotation"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://urn.fi/urn:nbn:fi:lb-2016102802"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/uh-rus.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["1.1 million words"],
"Annotation": ["No annotation"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://urn.fi/urn:nbn:fi:lb-2016102805"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/uh-spa.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["2.3 million tokens"],
"Annotation": ["No annotation"],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://urn.fi/urn:nbn:fi:lb-2016102804"
},
Expand Down
1 change: 1 addition & 0 deletions corpora/academic-corpora/uh-swe.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"License": "CC BY",
"Size": ["105 million tokens"],
"Annotation": [],
"Infrastructure": "CLARIN",
"Access": {
"Concordancer": "http://urn.fi/urn:nbn:fi:lb-2016102801"
},
Expand Down

0 comments on commit bcfb622

Please sign in to comment.