Skip to content

Commit

Permalink
Merge pull request #420 from empiriker/es
Browse files Browse the repository at this point in the history
Extract linkages from Spanish Wiktionary
  • Loading branch information
xxyzz authored Dec 6, 2023
2 parents b4091a5 + 44e35cb commit ecb5e11
Show file tree
Hide file tree
Showing 8 changed files with 581 additions and 25 deletions.
314 changes: 307 additions & 7 deletions json_schema/es.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,45 @@
"title": "Example",
"type": "object"
},
"Linkage": {
"additionalProperties": false,
"properties": {
"alternative_spelling": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Alternative spelling of the word",
"title": "Alternative Spelling"
},
"note": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Note"
},
"word": {
"title": "Word",
"type": "string"
}
},
"required": [
"word"
],
"title": "Linkage",
"type": "object"
},
"Reference": {
"additionalProperties": false,
"properties": {
Expand Down Expand Up @@ -193,6 +232,21 @@
"Sense": {
"additionalProperties": false,
"properties": {
"antonyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Antonyms"
},
"categories": {
"default": [],
"description": "list of sense-disambiguated category names extracted from (a subset) of the Category links on the page",
Expand All @@ -202,6 +256,36 @@
"title": "Categories",
"type": "array"
},
"compounds": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Compounds"
},
"derived": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Derived"
},
"examples": {
"default": [],
"description": "List of examples",
Expand All @@ -219,6 +303,81 @@
"title": "Glosses",
"type": "array"
},
"hypernyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Hypernyms"
},
"hyponyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Hyponyms"
},
"idioms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Idioms"
},
"meronyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Meronyms"
},
"related": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Related"
},
"senseid": {
"anyOf": [
{
Expand All @@ -232,14 +391,20 @@
"description": "Sense number used in Wiktionary",
"title": "Senseid"
},
"subsenses": {
"synonyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"description": "List of subsenses",
"items": {
"$ref": "#/$defs/Sense"
},
"title": "Subsenses",
"type": "array"
"title": "Synonyms"
},
"tags": {
"default": [],
Expand Down Expand Up @@ -463,6 +628,21 @@
"additionalProperties": false,
"description": "WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.",
"properties": {
"antonyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Antonyms"
},
"categories": {
"default": [],
"description": "list of non-disambiguated categories for the word",
Expand All @@ -472,6 +652,81 @@
"title": "Categories",
"type": "array"
},
"compounds": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Compounds"
},
"derived": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Derived"
},
"hypernyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Hypernyms"
},
"hyponyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Hyponyms"
},
"idioms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Idioms"
},
"lang_code": {
"description": "Wiktionary language code",
"examples": [
Expand All @@ -488,6 +743,21 @@
"title": "Lang Name",
"type": "string"
},
"meronyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Meronyms"
},
"pos": {
"default": null,
"description": "Part of speech type",
Expand All @@ -500,6 +770,21 @@
"title": "Pos Title",
"type": "string"
},
"related": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Related"
},
"senses": {
"anyOf": [
{
Expand Down Expand Up @@ -545,6 +830,21 @@
"default": [],
"title": "Spellings"
},
"synonyms": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/Linkage"
},
"type": "array"
},
{
"type": "null"
}
],
"default": [],
"title": "Synonyms"
},
"translations": {
"anyOf": [
{
Expand Down
4 changes: 4 additions & 0 deletions src/wiktextract/data/es/linkage_subtitles.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
{
"antónimo": "antonyms",
"compuestos": "compounds",
"derivad": "derived",
"hipónimo": "hyponyms",
"hiperónimo": "hypernyms",
"merónimo": "meronyms",
"locucion": "idioms",
"locuciones": "idioms",
"relacionado": "related",
"refranes": "proverbs",
"sinónimo": "synonyms"
}
Loading

0 comments on commit ecb5e11

Please sign in to comment.