diff --git a/scrapebotv3.py b/scrapebotv3.py index b655e78..b2adc72 100644 --- a/scrapebotv3.py +++ b/scrapebotv3.py @@ -222,6 +222,11 @@ def returnListofDocumentURLsonResultspage(bsObj): s = session.get(documenturl, headers=headers) bsObj = BeautifulSoup(s.content, 'html.parser') nameofcase = bsObj.find('span', class_ = "caseTitle").get_text() + + import string + for char in string.punctuation: #strip puntuation marks from case name + nameofcase = nameofcase.replace(char, " ") + if bsObj.find('span', class_ = 'Citation offhyperlink'): citation = bsObj.find('span', class_ = 'Citation offhyperlink').get_text() combinednameofcase = (nameofcase + citation)