From d06fdc86f02b2d9aa35ced6b477c833ce6b68016 Mon Sep 17 00:00:00 2001 From: Yosuke Higashi Date: Thu, 29 Feb 2024 09:15:01 +0000 Subject: [PATCH 1/3] move nltk.download --- src/langcheck/augment/en/_gender/_gender.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/langcheck/augment/en/_gender/_gender.py b/src/langcheck/augment/en/_gender/_gender.py index fc76fbf2..a902b6fa 100644 --- a/src/langcheck/augment/en/_gender/_gender.py +++ b/src/langcheck/augment/en/_gender/_gender.py @@ -11,15 +11,6 @@ from langcheck.augment.en._gender._gender_pronouns import (_PRONOUNS_DICT, _BaseGenderPronouns) -try: - nltk.data.find('averaged_perceptron_tagger') -except LookupError: - nltk.download('averaged_perceptron_tagger') -try: - nltk.data.find('punkt') -except LookupError: - nltk.download('punkt') - # This dictionary is used to determine the form of the pronoun. # Note that his and hers are not included in this dictionary because they can be # either of two different forms depending on the context. @@ -80,6 +71,14 @@ def _replace_gender_pronouns( Returns: str: Augmented text. """ + try: + nltk.data.find('averaged_perceptron_tagger') + except LookupError: + nltk.download('averaged_perceptron_tagger') + try: + nltk.data.find('punkt') + except LookupError: + nltk.download('punkt') tagged_words = pos_tag(word_tokenize(text)) augmented_words = [ _replace_pronoun(word, tag, target_pronouns) From 19607e8308fb1a962e9be1e2bc0eb834b2cc3381 Mon Sep 17 00:00:00 2001 From: Yosuke Higashi <107823399+yosukehigashi@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:28:35 +0900 Subject: [PATCH 2/3] Update src/langcheck/augment/en/_gender/_gender.py Co-authored-by: Koki Ryu --- src/langcheck/augment/en/_gender/_gender.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/langcheck/augment/en/_gender/_gender.py b/src/langcheck/augment/en/_gender/_gender.py index a902b6fa..0930123f 100644 --- a/src/langcheck/augment/en/_gender/_gender.py +++ b/src/langcheck/augment/en/_gender/_gender.py @@ -72,7 +72,7 @@ def _replace_gender_pronouns( str: Augmented text. """ try: - nltk.data.find('averaged_perceptron_tagger') + nltk.data.find('taggers/averaged_perceptron_tagger') except LookupError: nltk.download('averaged_perceptron_tagger') try: From e9031296ce9fd72b5920fc299a879a70e9600e05 Mon Sep 17 00:00:00 2001 From: Yosuke Higashi <107823399+yosukehigashi@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:28:44 +0900 Subject: [PATCH 3/3] Update src/langcheck/augment/en/_gender/_gender.py Co-authored-by: Koki Ryu --- src/langcheck/augment/en/_gender/_gender.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/langcheck/augment/en/_gender/_gender.py b/src/langcheck/augment/en/_gender/_gender.py index 0930123f..868ae4ad 100644 --- a/src/langcheck/augment/en/_gender/_gender.py +++ b/src/langcheck/augment/en/_gender/_gender.py @@ -76,7 +76,7 @@ def _replace_gender_pronouns( except LookupError: nltk.download('averaged_perceptron_tagger') try: - nltk.data.find('punkt') + nltk.data.find('tokenizers/punkt') except LookupError: nltk.download('punkt') tagged_words = pos_tag(word_tokenize(text))