Skip to content

Commit

Permalink
#10 hxltmcli: better tolerance to the soon to be deprecated format
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Nov 28, 2021
1 parent 9ad50d1 commit 4a0d186
Show file tree
Hide file tree
Showing 9 changed files with 80 additions and 23 deletions.
53 changes: 40 additions & 13 deletions bin/hxltmcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@
[eng-Latn]_
"""

# TODO: since this commit an error with syntax is breaking using on python 3.7
# https://github.com/EticaAI/hxltm/runs/4343555339?check_suite_focus=true
# This needs better check later if still persists or is false positive

import sys
import os
import logging
Expand Down Expand Up @@ -248,7 +253,7 @@
from liquid.context import Context as LiquidContext
# from liquid.token import TOKEN_EXPRESSION as LIQUID_TOKEN_EXPRESSION

__VERSION__ = "v0.8.9"
__VERSION__ = "v0.9.0-rc1"

# _[eng-Latn]
# Note: If you are doing a fork and making it public, please customize
Expand All @@ -271,6 +276,7 @@

__ATTRIBUTUM_OPTIONEM__ = {
# No annotationem (text notes) at concept level: please use meta_conceptum
'accuratum': ['#item+terminum+__linguam__+accuratum'],
'annotationem': [
'#meta+linguam+__linguam__+annotationem',
'#meta+terminum+__linguam__+annotationem',
Expand Down Expand Up @@ -348,11 +354,7 @@


class HXLTMCLI: # pylint: disable=too-many-instance-attributes
"""
_[eng-Latn] hxltmcli is an working draft of a tool to
convert prototype of translation memory stored with HXL to
XLIFF v2.1
[eng-Latn]_
"""HXLTMCLI
"""

def __init__(self):
Expand Down Expand Up @@ -454,9 +456,6 @@ def _initiale_hxltm_asa(self, archivum: str) -> bool:
bool: If okay.
"""

# with open(archivum, 'r') as arch:
# hxltm_crudum = arch.read().splitlines()

self.hxltm_asa = HXLTMASA(
archivum,
ontologia=self._ontologia,
Expand Down Expand Up @@ -1172,6 +1171,7 @@ def in_noop(self, hxlated_input, tab_output, is_stdout):
Requires that the input must be a valid HXLated file
"""
# pylint: disable=no-self-use

with open(hxlated_input, 'r') as csv_file:
csv_reader = csv.reader(csv_file)
Expand Down Expand Up @@ -1632,7 +1632,7 @@ class HXLTMArgumentum: # pylint: disable=too-many-instance-attributes
tmeta: InitVar[dict] = None
agendum_linguam: InitVar[List[Type['HXLTMLinguam']]] = []
auxilium_linguam: InitVar[List[Type['HXLTMLinguam']]] = []
agendum_attributum: InitVar[List[Type[str]]] = []
agendum_attributum: InitVar[List[str]] = []
fontem_linguam: InitVar[Type['HXLTMLinguam']] = None
objectivum_linguam: InitVar[Type['HXLTMLinguam']] = None
objectivum_formatum: InitVar[str] = 'HXLTM'
Expand All @@ -1651,7 +1651,6 @@ class HXLTMArgumentum: # pylint: disable=too-many-instance-attributes
versionem: InitVar[bool] = False
# crudum_argparse: InitVar[Dict] = {}

# def de_argparse(self, args_rem: Type['ArgumentParser']):
def de_argparse(self, args_rem: Dict = None):
"""Argūmentum de Python argparse
Expand All @@ -1662,7 +1661,8 @@ def de_argparse(self, args_rem: Dict = None):
Returns:
[HXLTMArgumentum]: Ego HXLTMArgumentum
"""
# print(args_rem)
# pylint: disable=too-many-branches,too-many-statements

if args_rem is not None:
if hasattr(args_rem, 'outfile'):
self.objectivum_archivum_nomen = args_rem.outfile
Expand Down Expand Up @@ -1956,7 +1956,7 @@ def est_venandum_insectum(self, rem: bool):

return self

def v(self, _verbosum: bool = None): # pylint: disable=invalid-name
def v(self, _verbosum: bool = None):
"""Ego python Dict
Trivia:
Expand All @@ -1969,6 +1969,8 @@ def v(self, _verbosum: bool = None): # pylint: disable=invalid-name
Returns:
[Dict]: Python objectīvum
"""
# pylint: disable=invalid-name,too-many-branches

# TODO: add a commom helper of this for all other .v()
# TODO: make it at least one level more deep (or recursive)

Expand Down Expand Up @@ -2066,6 +2068,7 @@ class HXLTMDatum:
#>>> crudum_datum
"""
# pylint: disable=too-many-instance-attributes

# crudum: InitVar[List] = []
crudum_caput: InitVar[List] = []
Expand Down Expand Up @@ -4982,6 +4985,9 @@ class HXLTMOntologia:
"""

# dēprecātum, https://en.wiktionary.org/wiki/deprecatus#Latin
_deprecatum: Type[set] = set()

def __init__(self, ontologia: Dict, vacuum: bool = False):
"""
_[eng-Latn] Constructs all the necessary attributes for the
Expand Down Expand Up @@ -5421,6 +5427,15 @@ def quod_nomen_breve_de_hxl(self, hxl_hashtag: str) -> str:
nomen_breve = 'conceptum_typum'

elif hxl_hashtag.startswith('#status+rem+accuratum+i_'):
# Deprecated
self._deprecatum.add(
'deprecatum [{0}]: #item+terminum+__linguam__+accuratum'.format(
hxl_hashtag)
)
nomen_breve = 'accuratum__L__'

elif hxl_hashtag.startswith('#item+terminum+i_') and \
hxl_hashtag.endswith('+accuratum'):
nomen_breve = 'accuratum__L__'

elif hxl_hashtag.startswith('#status+rem+textum+i_'):
Expand All @@ -5430,6 +5445,12 @@ def quod_nomen_breve_de_hxl(self, hxl_hashtag: str) -> str:
nomen_breve = 'statum_rem_json__L__'

elif hxl_hashtag.startswith('#item+rem+i_'):
# Deprecated
self._deprecatum.add('deprecatum [{0}]'.format(hxl_hashtag))
nomen_breve = 'rem__L__'

elif hxl_hashtag.startswith('#item+terminum+i_') and \
hxl_hashtag.endswith('+rem'):
nomen_breve = 'rem__L__'

return nomen_breve
Expand Down Expand Up @@ -5691,6 +5712,12 @@ def initialle(self, strictum: bool): # pylint: disable=too-many-branches
if len(parts[0]) == 2:
self.iso6391a2 = parts[0].lower()

# Improved message over
# ValueError: not enough values to unpack (expected 2, got 1)
if self.linguam.find('-') == -1:
raise ValueError(
'HXLTMLinguam zzz-Zzzz? [{0}]'.format(str(self.linguam)))

self.iso6393, self.iso115924 = \
list(self.linguam.split('-'))

Expand Down
8 changes: 7 additions & 1 deletion bin/hxltmdexml.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@
[eng-Latn]_
"""

# TODO: since this commit an error with syntax is breaking using on python 3.7
# https://github.com/EticaAI/hxltm/runs/4343555339?check_suite_focus=true
# This needs better check later if still persists or is false positive

# import xml.etree.ElementTree as et
import sys
import os
Expand Down Expand Up @@ -191,7 +195,7 @@
# pip3 install langcodes
# import langcodes

__VERSION__ = "v0.8.9"
__VERSION__ = "v0.9.0-rc1"

# _[eng-Latn]
# Note: If you are doing a fork and making it public, please customize
Expand Down Expand Up @@ -235,6 +239,7 @@
'meta_conceptum': ['#meta+conceptum'],
'meta_linguam': ['#meta+linguam+__linguam__'],
'meta_terminum': ['#meta+terminum+__linguam__'],
# __nomen_breve: 'rem__L__'
'rem': ['#item+terminum+__linguam__+rem'],
}
__ATTRIBUTUM_DEFALLO__ = [
Expand Down Expand Up @@ -3252,6 +3257,7 @@ def __init__(
ontologia: Type['HXLTMOntologia'],
agendum_linguam: Type[List['HXLTMLinguam']] = [],
agendum_attributum: Type[List[str]] = [],
# agendum_attributum: Type[List['str']] = [],
fontem_linguam: Type['HXLTMLinguam'] = None,
objectivum_linguam: Type['HXLTMLinguam'] = None
):
Expand Down
6 changes: 6 additions & 0 deletions docs/eng-Latn/hxltm.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ include::../testum/hxltm-salve-mundi.hxltm.xml[]

////
hxltmdexml --agendum-linguam lat-Latn,arb-Arab testum/hxltm-salve-mundi.hxltm.xml > testum/resultatum/hxltm-salve-mundi.tm.hxl.csv
hxltmcli --agendum-linguam eng-Latn@en,por-Latn@pt hxl-non-hxltm-exemplum.hxl.csv
////
[source,bash]
----
Expand All @@ -68,6 +71,9 @@ hxltmdexml --agendum-linguam lat-Latn,arb-Arab testum/hxltm-salve-mundi.hxltm.xm
include::../testum/resultatum/hxltm-salve-mundi.tm.hxl.csv[]
----

> TODO: make it work with new format
> `hxltmcli hxltm-exemplum-glossarium-minimum.tm.hxl.csv --objectivum-TMX`

////
== Drafts
Expand Down
2 changes: 2 additions & 0 deletions ontologia/cor.hxltm.215.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1711,7 +1711,9 @@ ontologia:
# Trivia: linguam, https://en.wiktionary.org/wiki/lingua#Latin
__linguam__:
# Exemplum: '#item +rem +i_en +i_eng +is_Latn'
# TODO: invert __HXL with __HXL_deprecatum
__HXL: '#item +rem __linguam__'
__HXL_deprecatum: ['#item +terminum +__linguam__ +rem']
__nomen_breve: 'rem__L__' # rem.rem__L__
__id: ontologia.commune.rem.__linguam__
__libellam: rem
Expand Down
7 changes: 7 additions & 0 deletions testum/hxl-non-hxltm-exemplum.hxl.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#vocab+id,#vocab+term+i_en,#vocab+term+i_pt
1,UN,ONU
2,United Nations,Organização das Nações Unidas
3,IFRC,FICV/CV
4,International Federation of Red Cross and Red Crescent Societies ,Federação Internacional das Sociedades da Cruz Vermelha e do Crescente Vermelho
5,AI,AI
6,Amnesty International,Amnistia Internacional
14 changes: 7 additions & 7 deletions testum/hxltm-exemplum-glossarium-minimum.tm.hxl.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#item +rem +i_en +i_eng +is_Latn,#item +rem +i_pt +i_por +is_Latn
UN,ONU
United Nations,Organização das Nações Unidas
IFRC,FICV/CV
International Federation of Red Cross and Red Crescent Societies ,Federação Internacional das Sociedades da Cruz Vermelha e do Crescente Vermelho
AI,AI
Amnesty International,Amnistia Internacional
#item+conceptum+codicem,#item+terminum+i_en+i_eng+is_Latn+rem,#item+terminum+i_pt+i_por+is_Latn+rem
1,UN,ONU
2,United Nations,Organização das Nações Unidas
3,IFRC,FICV/CV
4,International Federation of Red Cross and Red Crescent Societies ,Federação Internacional das Sociedades da Cruz Vermelha e do Crescente Vermelho
5,AI,AI
6,Amnesty International,Amnistia Internacional
2 changes: 1 addition & 1 deletion testum/hxltmcli--help_eng-Latn.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ usage: hxltmcli [-h] [--sheet [number]] [--selector [path]]
[--venandum-insectum-est] [--versionem]
[infile] [outfile]

_[eng-Latn] hxltmcli v0.8.9 is an example of implementation of the
_[eng-Latn] hxltmcli v0.9.0-rc1 is an example of implementation of the
Multilingual Terminology in Humanitarian Language Exchange (HXLTM).
For XML processing, use hxltmdexml.
[eng-Latn]_"
Expand Down
2 changes: 1 addition & 1 deletion testum/hxltmdexml--help_eng-Latn.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ usage: hxltmdexml [-h] [--agendum-linguam [agendum_linguam]]
[infile] [outfile]

_[eng-Latn]
hxltmdexml v0.8.9 is an example of implementation of the
hxltmdexml v0.9.0-rc1 is an example of implementation of the
Multilingual Terminology in Humanitarian Language Exchange (HXLTM)
focused on convert XML files to the pivot format HXLTM.
[eng-Latn]_"
Expand Down
9 changes: 9 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@
# conda, etc, etc etc
# @see https://github.com/tox-dev

#### lint
# pylint --disable=W0511,C0302 bin/hxltmcli.py
# pylint --disable=W0511,C0302 bin/hxltmdexml.py
# pylint --disable=W0511,C0302 bin/
#
# See also https://pypi.org/project/autopep8/
# autopep8 --list-fixes bin/hxltmcli.py
# autopep8 --diff bin/hxltmcli.py

# ___________________________________________________________________________ #
# Here is where thing start
[tox]
Expand Down

0 comments on commit 4a0d186

Please sign in to comment.