Skip to content

Commit

Permalink
fix-2514: Fix ceurws scraper where URN is unavailable
Browse files Browse the repository at this point in the history
  • Loading branch information
DaxServer authored and fnielsen committed Sep 10, 2024
1 parent 84d1ce4 commit 51a2d05
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions scholia/scrape/ceurws.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,15 +358,19 @@ def proceedings_url_to_proceedings(url, return_tree=False):
if len(acronym_elements) == 1:
proceedings['shortname'] = acronym_elements[0].text

proceedings['urn'] = \
tree.xpath("//span[@class='CEURURN']")[0].text

proceedings['title'] = re.sub(
r'\s+', ' ',
tree.xpath("//span[@class='CEURFULLTITLE']")[0].text).strip()

proceedings['date'] = \
tree.xpath("//span[@class='CEURPUBDATE']")[0].text
urn_elements = tree.xpath("//span[@class='CEURURN']")
if len(urn_elements) == 1:
proceedings['urn'] = urn_elements[0].text

title_elements = tree.xpath("//span[@class='CEURFULLTITLE']")
if len(title_elements) == 1:
proceedings['title'] = re.sub(
r'\s+', ' ',
title_elements[0].text).strip()

date_elements = tree.xpath("//span[@class='CEURPUBDATE']")
if len(date_elements) == 1:
proceedings['date'] = date_elements[0].text

proceedings['published_in_q'] = 'Q27230297'

Expand Down

0 comments on commit 51a2d05

Please sign in to comment.