diff --git a/src/dackar/text_processing/Abbreviation.py b/src/dackar/text_processing/Abbreviation.py index 7b21226..d4b22e7 100644 --- a/src/dackar/text_processing/Abbreviation.py +++ b/src/dackar/text_processing/Abbreviation.py @@ -61,9 +61,13 @@ def abbreviationSub(self, text): corrected = sent splitSent = sent.split() for word in splitSent: - if word not in not_acronyms: - if word in self.abbrDict.keys(): - full = self.abbrDict[word] + splitWord = re.split(r'[-\d+]', word) # split word if word contains '-' or numbers + checkAbbr = word if len(splitWord) == 1 else splitWord[-1] + if checkAbbr not in not_acronyms: + if checkAbbr in self.abbrDict.keys(): + full = self.abbrDict[checkAbbr] + # correct the word with the full name if the abbreviation presented in the word + full = re.sub(r'%s$' %str(checkAbbr), full, word) if isinstance(full, str): corrected = re.sub(r"\b%s\b" % str(word) , full, corrected) elif isinstance(full, list) and len(full) == 1: