diff --git a/.github/workflows/cldf-validation.yml b/.github/workflows/cldf-validation.yml index 8dcb470..3bb913a 100644 --- a/.github/workflows/cldf-validation.yml +++ b/.github/workflows/cldf-validation.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.9] + python-version: [3.12] steps: - uses: actions/checkout@v2 diff --git a/.zenodo.json b/.zenodo.json index 4839829..65c9940 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -16,7 +16,7 @@ "contributors": [ { "name": "Johann-Mattis List", - "type": "Other" + "type": "Editor" }, { "name": "Fabio Fromme", diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d0db6f3..6df1940 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -2,8 +2,8 @@ Name | GitHub user | Description | Role --- | --- | --- | --- -Johann-Mattis List | @LinguList | maintainer | Other -Fabio Fromme | @frython | student task | Other +Johann-Mattis List | @LinguList | maintainer | Editor +Fabio Fromme | @frython | student task | Other Simone Pomikalek | @Simonecomcom| student task | Other Janine Wieking | @ja-wie | student task | Other Maurizio Serva | | data collector | DataCollector, Author diff --git a/README.md b/README.md index 9785cf1..1d51f2b 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,8 @@ Conceptlists in Concepticon: ![BIPA: 100%](https://img.shields.io/badge/BIPA-100%25-brightgreen.svg "BIPA: 100%") ![CLTS SoundClass: 100%](https://img.shields.io/badge/CLTS%20SoundClass-100%25-brightgreen.svg "CLTS SoundClass: 100%") -- **Varieties:** 60 -- **Concepts:** 207 +- **Varieties:** 60 (linked to 22 different Glottocodes) +- **Concepts:** 207 (linked to 207 different Concepticon concept sets) - **Lexemes:** 12,420 - **Sources:** 1 - **Synonymy:** 1.00 @@ -41,8 +41,8 @@ Conceptlists in Concepticon: Name | GitHub user | Description | Role --- | --- | --- | --- -Johann-Mattis List | @LinguList | maintainer | Other -Fabio Fromme | @frython | student task | Other +Johann-Mattis List | @LinguList | maintainer | Editor +Fabio Fromme | @frython | student task | Other Simone Pomikalek | @Simonecomcom| student task | Other Janine Wieking | @ja-wie | student task | Other Maurizio Serva | | data collector | DataCollector, Author diff --git a/cldf/README.md b/cldf/README.md index cc09a31..829f435 100644 --- a/cldf/README.md +++ b/cldf/README.md @@ -13,8 +13,8 @@ property | value [dc:format](http://purl.org/dc/terms/format) |
  1. http://concepticon.clld.org/contributions/Serva-2020-207
[dc:license](http://purl.org/dc/terms/license) | https://creativecommons.org/licenses/by/4.0/ [dcat:accessURL](http://www.w3.org/ns/dcat#accessURL) | https://github.com/digling/servamalagasy -[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. digling/servamalagasy v1.0-10-g7a54e31
  2. Glottolog v5.0
  3. Concepticon v3.2.0
  4. CLTS v2.2.0-16-g4468d3b
-[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.9.6
  3. python-packages: requirements.txt
+[prov:wasDerivedFrom](http://www.w3.org/ns/prov#wasDerivedFrom) |
  1. digling/servamalagasy v1.0-12-g2995840
  2. Glottolog v5.0
  3. Concepticon v3.2.0
  4. CLTS v2.3.0
+[prov:wasGeneratedBy](http://www.w3.org/ns/prov#wasGeneratedBy) |
  1. lingpy-rcParams: lingpy-rcParams.json
  2. python: 3.12.4
  3. python-packages: requirements.txt
[rdf:ID](http://www.w3.org/1999/02/22-rdf-syntax-ns#ID) | servamalagasy [rdf:type](http://www.w3.org/1999/02/22-rdf-syntax-ns#type) | http://www.w3.org/ns/dcat#Distribution diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json index 98d8d7e..5e969f3 100644 --- a/cldf/cldf-metadata.json +++ b/cldf/cldf-metadata.json @@ -16,7 +16,7 @@ { "rdf:about": "https://github.com/digling/servamalagasy", "rdf:type": "prov:Entity", - "dc:created": "v1.0-10-g7a54e31", + "dc:created": "v1.0-12-g2995840", "dc:title": "Repository" }, { @@ -34,7 +34,7 @@ { "rdf:about": "https://github.com/cldf-clts/clts", "rdf:type": "prov:Entity", - "dc:created": "v2.2.0-16-g4468d3b", + "dc:created": "v2.3.0", "dc:title": "CLTS" } ], @@ -45,7 +45,7 @@ }, { "dc:title": "python", - "dc:description": "3.9.6" + "dc:description": "3.12.4" }, { "dc:title": "python-packages", diff --git a/cldf/lingpy-rcParams.json b/cldf/lingpy-rcParams.json index d22e776..536eae9 100644 --- a/cldf/lingpy-rcParams.json +++ b/cldf/lingpy-rcParams.json @@ -64,7 +64,7 @@ 10, 10 ], - "filename": "lingpy-2024-05-03", + "filename": "lingpy-2024-08-02", "gap_symbol": "-", "gap_weight": 0.5, "gop": -2, @@ -123,7 +123,7 @@ "scorer": {}, "sonar": true, "stress": "\u02c8\u02cc'", - "timestamp": "2024-05-03 17:59", + "timestamp": "2024-08-02 10:21", "tones": "\u00b9\u00b2\u00b3\u2074\u2075\u2076\u2077\u2078\u2079\u2070\u2081\u2082\u2083\u2084\u2085\u2086\u2087\u2088\u2089\u20800123456789\u02e5\u02e6\u02e7\u02e8\u02e9\u02ea\u02eb-\ua708-\ua709-\ua70a-\ua70b-\ua70c-\ua70d-\ua70e-\ua70f-\ua710-\ua711-\ua712-\ua713-\ua714-\ua715-\ua716-\ua717-\ua718-\ua719-\ua71a-\ua700-\ua701-\ua702-\ua703-\ua704-\ua705-\ua706-\ua707", "tree_calc": "neighbor", "unique_sequences": true, diff --git a/cldf/requirements.txt b/cldf/requirements.txt index f97d634..19ecbc1 100644 --- a/cldf/requirements.txt +++ b/cldf/requirements.txt @@ -1,9 +1,9 @@ appdirs==1.4.4 attrs==23.2.0 -Babel==2.14.0 +Babel==2.15.0 bibtexparser==2.0.0b7 bs4==0.0.2 -certifi==2024.2.2 +certifi==2024.7.4 cldfbench==1.14.0 cldfcatalog==1.5.1 cldfzenodo==2.1.1 @@ -11,46 +11,46 @@ clldutils==3.22.2 colorama==0.4.6 colorlog==6.8.2 csvw==3.3.0 -exceptiongroup==1.2.1 gitdb==4.0.11 +greenlet==3.0.3 idna==3.7 iniconfig==2.0.0 isodate==0.6.1 -jsonschema==4.22.0 +jsonschema==4.23.0 lingpy==2.6.13 -lxml==5.2.1 +lxml==5.2.2 Markdown==3.6 nameparser==1.1.3 -networkx==3.2.1 +networkx==3.3 newick==1.9.0 -numpy==1.26.4 -openpyxl==3.1.2 -packaging==24.0 +numpy==2.0.1 +openpyxl==3.1.5 +packaging==24.1 pluggy==1.5.0 pybtex==0.24.0 -pycldf==1.38.0 +pycldf==1.38.1 pyclts==3.2.0 pyconcepticon==3.1.0 -pycountry==23.12.11 +pycountry==24.6.1 pyglottolog==3.13.0 pylatexenc==2.10 -pylexibank==3.4.0 -pytest==8.2.0 +pylexibank==3.5.0 +pytest==8.3.2 python-dateutil==2.9.0.post0 rdflib==7.0.0 referencing==0.35.1 -regex==2024.4.28 -requests==2.31.0 +regex==2024.7.24 +requests==2.32.3 rfc3986==1.5.0 segments==2.2.1 six==1.16.0 smmap==5.0.1 soupsieve==2.5 -SQLAlchemy==1.4.52 +SQLAlchemy==1.4.53 tabulate==0.9.0 termcolor==2.4.0 tqdm==4.66.4 uritemplate==4.1.1 -urllib3==2.2.1 +urllib3==2.2.2 xlrd==2.0.1 zenodoclient==0.5.1 \ No newline at end of file diff --git a/lexibank_servamalagasy.py b/lexibank_servamalagasy.py index ff8a6b4..239263d 100644 --- a/lexibank_servamalagasy.py +++ b/lexibank_servamalagasy.py @@ -1,5 +1,5 @@ from pathlib import Path -from pylexibank.dataset import Dataset as BaseDataset +from pylexibank.dataset import Dataset as BaseDataset from pylexibank import Language, Concept, FormSpec from pylexibank import progressbar @@ -12,7 +12,6 @@ class CustomLanguage(Language): FileName = attr.ib(default=None) Number = attr.ib(default=None) Area = attr.ib(default=None) - @attr.s @@ -26,6 +25,8 @@ class CustomConcept(Concept): class Dataset(BaseDataset): dir = Path(__file__).parent id = "servamalagasy" + writer_options = dict(keep_languages=False, keep_parameters=False) + concept_class = CustomConcept language_class = CustomLanguage @@ -35,37 +36,36 @@ def cmd_makecldf(self, args): """ concepts = {} for concept in self.conceptlists[0].concepts.values(): - cid = '{0}_{1}'.format(concept.number, slug(concept.english)) + cid = "{0}_{1}".format(concept.number, slug(concept.english)) args.writer.add_concept( - ID=cid, - Name=concept.english, - Number=concept.number, - Italian_Gloss=concept.attributes["italian"], - French_Gloss=concept.attributes["french"], - Malagasy_Gloss=concept.attributes["malagasy"], - Concepticon_ID=concept.concepticon_id, - Concepticon_Gloss=concept.concepticon_gloss - ) - concepts[concept.attributes['french']] = cid + ID=cid, + Name=concept.english, + Number=concept.number, + Italian_Gloss=concept.attributes["italian"], + French_Gloss=concept.attributes["french"], + Malagasy_Gloss=concept.attributes["malagasy"], + Concepticon_ID=concept.concepticon_id, + Concepticon_Gloss=concept.concepticon_gloss, + ) + concepts[concept.attributes["french"]] = cid languages = {} for language in self.languages: args.writer.add_language( - ID=language['ID'], - Name=language['Name'], - Latitude=language['Latitude'], - Longitude=language['Longitude'], - Area=language['Area'], - Number=language['Number'], - Glottocode=language['Glottocode'] - ) - languages[language['FileName']] = language['ID'] + ID=language["ID"], + Name=language["Name"], + Latitude=language["Latitude"], + Longitude=language["Longitude"], + Area=language["Area"], + Number=language["Number"], + Glottocode=language["Glottocode"], + ) + languages[language["FileName"]] = language["ID"] args.writer.add_sources() - for row in progressbar(self.raw_dir.read_csv('data.tsv', delimiter='\t', - dicts=True)): + for row in progressbar(self.raw_dir.read_csv("data.tsv", delimiter="\t", dicts=True)): args.writer.add_form( - Language_ID=languages[row['FILENAME']], - Parameter_ID=concepts[row['FRENCH']], - Value=row['FORM'], - Form=row['FORM'], - Source='Serva2020' - ) + Language_ID=languages[row["FILENAME"]], + Parameter_ID=concepts[row["FRENCH"]], + Value=row["FORM"], + Form=row["FORM"], + Source="Serva2020", + )