fix issue [xueyuanl#5 Doesn't show definitions]; restore support for …

…italian
vladislavzl · Nov 4, 2019 · 5f51970 · 5f51970
1 parent bbeab86
commit 5f51970
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 31 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@ dict.log
 .idea
 __pycache__
 .DS_Store
+dist
diff --git a/README.md b/README.md
@@ -26,8 +26,8 @@ cambrinary -w world -t chinese
 - german
 - chinese
 - japanese
+- italian 
 - russian (temporarily no, cause cambrige web issue)
-- italian (temporarily no, cause cambrige web issue)
 
 ### multi-words support
 powered by coroutine, support as many as words you like,

diff --git a/cambrinary/__init__.py b/cambrinary/__init__.py
@@ -1,3 +1,3 @@
 """A Linux terminal online dictionary, based on `Cambridge Dictionary <https://dictionary.cambridge.org>`_"""
 
-__version__ = '0.1.0'
+__version__ = '0.1.1'
diff --git a/cambrinary/cambrinary.py b/cambrinary/cambrinary.py
@@ -59,7 +59,7 @@ def parse_xref(xref, indent=4):
 
 def parse_pronunciation(part_speech, trans):
     """
-    retrive the pronunciation from part_speech
+    retrieve the pronunciation from part_speech
     :param part_speech:
     :param trans:
     :return:
@@ -132,7 +132,7 @@ def parse_pronunciation(part_speech, trans):
                     res += '{} {} '.format(colors.pron_region(region.get_text().upper()),
                                            colors.pronunciation('/{}/'.format(ipa.get_text())))
             pronunciation.prons = res
-    logger.info('the pronunciation is: '.format(pronunciation.to_str()))
+    logger.info('the pronunciation is: {}'.format(pronunciation.to_str()))
     return pronunciation
 
 
@@ -159,36 +159,14 @@ def get_sense_block_title(block):
     return res
 
 
-def get_dictionary(html):
-    """
-    retrive dictionary body.
-    :param html:
-    :return:
-    """
-    PR_DICTIONARY = 'pr dictionary'
-    ENTRY_BODY = 'entry-body'
-    parsed_html = BeautifulSoup(html, features='html.parser')
-    # this area contains all the dictionaries
-    res_dict = None
-    dictionaries = parsed_html.body.findAll('div', attrs={'class': PR_DICTIONARY})
-    if dictionaries:  # get at least one dictionary
-
-        res_dict = dictionaries[0]
-        logger.info('get a dictionary by {}'.format(PR_DICTIONARY))
-    else:  # no dictionary, just entry-body
-        res_dict = parsed_html.body.find('div', attrs={'class': ENTRY_BODY})
-        logger.info('get a dictionary by {}'.format(ENTRY_BODY))
-    return res_dict
-
-
 def parse_pad_indents(block, args):
     res = []
     sense_body = block.find('div', attrs={'class': 'sense-body dsense_b'})
     pad_indents = sense_body.findAll('div', attrs={'class': 'def-block ddef_block'})
     logger.info('the number of pad_indent is {}'.format(len(pad_indents) if pad_indents else 0))
 
     def get_definition(p):
-        d = p.find('div', attrs={'class': 'def ddef_d'})
+        d = p.find('div', attrs={'class': 'def ddef_d db'})
         return d.get_text() if d else None
 
     def get_trans(body):
@@ -252,8 +230,8 @@ def get_part_speeches(dict, trans):
         return dict.findAll('div', attrs={'class': 'pr entry-body__el'})
     if trans == FR:
         return dict.findAll('div', attrs={'class': 'd pr di english-french kdic'})
-    if trans == RU or IT:
-        return dict.findAll('div', attrs={'class': 'entry-body__el'}) or \
+    if trans == RU or trans == IT:
+        return dict.findAll('div', attrs={'class': 'pr entry-body__el'}) or \
                dict.findAll('div', attrs={
                    'class': 'di $dict entry-body__el entry-body__el--smalltop clrd js-share-holder'})  # for look-up case
 
@@ -289,8 +267,30 @@ def parse_part_speeches(part_speeches, trans):
     return res
 
 
+def get_dictionary(html):
+    """
+    retrieve dictionary body.
+    :param html:
+    :return:
+    """
+    pr_dictionary = 'pr dictionary'
+    entry_body = 'entry-body'
+    parsed_html = BeautifulSoup(html, features='html.parser')
+    # this area contains all the dictionaries
+    res_dict = None
+    dictionaries = parsed_html.body.findAll('div', attrs={'class': pr_dictionary})
+    if dictionaries:  # get at least one dictionary
+
+        res_dict = dictionaries[0]
+        logger.info('get a dictionary by {}'.format(pr_dictionary))
+    else:  # no dictionary, just entry-body
+        res_dict = parsed_html.body.find('div', attrs={'class': entry_body})
+        logger.info('get a dictionary by {}'.format(entry_body))
+    return res_dict
+
+
 async def look_up(word, trans, results):
-    logger.info('begin to retrive word: [{}] in {}'.format(word, trans))
+    logger.info('begin to retrieve word: [{}] in {}'.format(word, trans))
     html = await load(word, translation[trans])
     dictionary = get_dictionary(html)
     if not dictionary:

diff --git a/cambrinary/country_const.py b/cambrinary/country_const.py
@@ -6,4 +6,4 @@
 RU = 'russian'
 IT = 'italian'
 
-SUPPORT_LANG = [GB, CN, DE, JP, FR]  # no support russian and italian, cause the cambridge server issue
+SUPPORT_LANG = [GB, CN, DE, JP, FR, IT]  # no support for russian temporarily due to irregular HTML tags.