Skip to content

Commit

Permalink
Merge pull request #280 from yldoctrine/fix_newspaper_extractor_no_im…
Browse files Browse the repository at this point in the history
…ages

Fix article_extractor to only instantiate the Extractors in the specified file
  • Loading branch information
fhamborg authored Jul 29, 2024
2 parents ec05935 + de7d942 commit f490216
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions newsplease/pipeline/extractor/article_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(self, extractor_list):
:param extractor_list: List of strings containing all extractors to be initialized.
"""

def proc_instance(instance):
if instance is not None:
self.log.info('Extractor initialized: %s', extractor)
Expand All @@ -35,15 +36,19 @@ def proc_instance(instance):
else:
extractor_module = extractor

module = importlib.import_module(__package__ + '.extractors.' + extractor_module)
module_name = __package__ + '.extractors.' + extractor_module
module = importlib.import_module(module_name)

if isinstance(extractor, tuple):
proc_instance(getattr(module, extractor[1], None)())
else:
# check module for subclasses of AbstractExtractor
# check in the current module for subclasses of AbstractExtractor
for member in inspect.getmembers(module, inspect.isclass):
if issubclass(member[1], AbstractExtractor) and member[0] != 'AbstractExtractor':

if (
member[1].__module__ == module_name
and issubclass(member[1], AbstractExtractor)
and member[0] != 'AbstractExtractor'
):
# instantiate extractor
proc_instance(getattr(module, member[0], None)())

Expand Down

0 comments on commit f490216

Please sign in to comment.