Skip to content

Commit

Permalink
fix issue [xueyuanl#5 Doesn't show definitions]; restore support for …
Browse files Browse the repository at this point in the history
…italian
  • Loading branch information
macbook committed Nov 4, 2019
1 parent bbeab86 commit 5f51970
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 31 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ dict.log
.idea
__pycache__
.DS_Store
dist
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ cambrinary -w world -t chinese
- german
- chinese
- japanese
- italian
- russian (temporarily no, cause cambrige web issue)
- italian (temporarily no, cause cambrige web issue)

### multi-words support
powered by coroutine, support as many as words you like,
Expand Down
2 changes: 1 addition & 1 deletion cambrinary/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""A Linux terminal online dictionary, based on `Cambridge Dictionary <https://dictionary.cambridge.org>`_"""

__version__ = '0.1.0'
__version__ = '0.1.1'
56 changes: 28 additions & 28 deletions cambrinary/cambrinary.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def parse_xref(xref, indent=4):

def parse_pronunciation(part_speech, trans):
"""
retrive the pronunciation from part_speech
retrieve the pronunciation from part_speech
:param part_speech:
:param trans:
:return:
Expand Down Expand Up @@ -132,7 +132,7 @@ def parse_pronunciation(part_speech, trans):
res += '{} {} '.format(colors.pron_region(region.get_text().upper()),
colors.pronunciation('/{}/'.format(ipa.get_text())))
pronunciation.prons = res
logger.info('the pronunciation is: '.format(pronunciation.to_str()))
logger.info('the pronunciation is: {}'.format(pronunciation.to_str()))
return pronunciation


Expand All @@ -159,36 +159,14 @@ def get_sense_block_title(block):
return res


def get_dictionary(html):
"""
retrive dictionary body.
:param html:
:return:
"""
PR_DICTIONARY = 'pr dictionary'
ENTRY_BODY = 'entry-body'
parsed_html = BeautifulSoup(html, features='html.parser')
# this area contains all the dictionaries
res_dict = None
dictionaries = parsed_html.body.findAll('div', attrs={'class': PR_DICTIONARY})
if dictionaries: # get at least one dictionary

res_dict = dictionaries[0]
logger.info('get a dictionary by {}'.format(PR_DICTIONARY))
else: # no dictionary, just entry-body
res_dict = parsed_html.body.find('div', attrs={'class': ENTRY_BODY})
logger.info('get a dictionary by {}'.format(ENTRY_BODY))
return res_dict


def parse_pad_indents(block, args):
res = []
sense_body = block.find('div', attrs={'class': 'sense-body dsense_b'})
pad_indents = sense_body.findAll('div', attrs={'class': 'def-block ddef_block'})
logger.info('the number of pad_indent is {}'.format(len(pad_indents) if pad_indents else 0))

def get_definition(p):
d = p.find('div', attrs={'class': 'def ddef_d'})
d = p.find('div', attrs={'class': 'def ddef_d db'})
return d.get_text() if d else None

def get_trans(body):
Expand Down Expand Up @@ -252,8 +230,8 @@ def get_part_speeches(dict, trans):
return dict.findAll('div', attrs={'class': 'pr entry-body__el'})
if trans == FR:
return dict.findAll('div', attrs={'class': 'd pr di english-french kdic'})
if trans == RU or IT:
return dict.findAll('div', attrs={'class': 'entry-body__el'}) or \
if trans == RU or trans == IT:
return dict.findAll('div', attrs={'class': 'pr entry-body__el'}) or \
dict.findAll('div', attrs={
'class': 'di $dict entry-body__el entry-body__el--smalltop clrd js-share-holder'}) # for look-up case

Expand Down Expand Up @@ -289,8 +267,30 @@ def parse_part_speeches(part_speeches, trans):
return res


def get_dictionary(html):
"""
retrieve dictionary body.
:param html:
:return:
"""
pr_dictionary = 'pr dictionary'
entry_body = 'entry-body'
parsed_html = BeautifulSoup(html, features='html.parser')
# this area contains all the dictionaries
res_dict = None
dictionaries = parsed_html.body.findAll('div', attrs={'class': pr_dictionary})
if dictionaries: # get at least one dictionary

res_dict = dictionaries[0]
logger.info('get a dictionary by {}'.format(pr_dictionary))
else: # no dictionary, just entry-body
res_dict = parsed_html.body.find('div', attrs={'class': entry_body})
logger.info('get a dictionary by {}'.format(entry_body))
return res_dict


async def look_up(word, trans, results):
logger.info('begin to retrive word: [{}] in {}'.format(word, trans))
logger.info('begin to retrieve word: [{}] in {}'.format(word, trans))
html = await load(word, translation[trans])
dictionary = get_dictionary(html)
if not dictionary:
Expand Down
2 changes: 1 addition & 1 deletion cambrinary/country_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
RU = 'russian'
IT = 'italian'

SUPPORT_LANG = [GB, CN, DE, JP, FR] # no support russian and italian, cause the cambridge server issue
SUPPORT_LANG = [GB, CN, DE, JP, FR, IT] # no support for russian temporarily due to irregular HTML tags.

0 comments on commit 5f51970

Please sign in to comment.