Skip to content

Commit

Permalink
add exact result support (#80)
Browse files Browse the repository at this point in the history
  • Loading branch information
markkvdb committed Oct 6, 2020
1 parent a6c60cd commit 616d351
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 1 deletion.
3 changes: 3 additions & 0 deletions jobfunnel/backend/scrapers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def __init__(self, session: Session, config: 'JobFunnelConfigManager',
self.session = session
self.config = config
self.query = ' '.join(config.search_config.keywords)
# if we match exact result, we add quotes.
if self.config.search_config.exact_result:
self.query = f'"{self.query}"'
if self.headers:
self.session.headers.update(self.headers)

Expand Down
8 changes: 8 additions & 0 deletions jobfunnel/config/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,13 @@ def parse_cli(args: List[str]) -> Dict[str, Any]:
'(NOTE: this is only available for Indeed provider).',
)

search_group.add_argument(
'--exact-result',
dest='search.exact_result',
action='store_true',
help='Match exact search query',
)

# Proxy stuff. TODO: way to tell argparse if proxy is seen all are req'd?
proxy_group = cli_parser.add_argument_group('proxy')
proxy_group.add_argument(
Expand Down Expand Up @@ -354,6 +361,7 @@ def get_config_manager(config: Dict[str, Any]) -> JobFunnelConfigManager:
city=config['search']['city'],
distance_radius=config['search']['radius'],
return_similar_results=config['search']['similar_results'],
exact_result=config['search']['exact_result'],
max_listing_days=config['search']['max_listing_days'],
blocked_company_names=config['search']['company_block_list'],
locale=Locale[config['search']['locale']],
Expand Down
2 changes: 1 addition & 1 deletion jobfunnel/config/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(self,
no_scrape: Optional[bool] = False,
bs4_parser: Optional[str] = BS4_PARSER,
return_similar_results: Optional[bool] = False,
exact_result: Optional[bool] = False,
delay_config: Optional[DelayConfig] = None,
proxy_config: Optional[ProxyConfig] = None) -> None:
"""Init a config that determines how we will scrape jobs from Scrapers
Expand Down Expand Up @@ -69,7 +70,6 @@ def __init__(self,
self.log_level = log_level
self.no_scrape = no_scrape
self.bs4_parser = bs4_parser # NOTE: this is not currently configurable
self.return_similar_results = return_similar_results
if not delay_config:
# We will always use a delay config to be respectful
self.delay_config = DelayConfig()
Expand Down
3 changes: 3 additions & 0 deletions jobfunnel/config/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self,
city: Optional[str] = None,
distance_radius: Optional[int] = None,
return_similar_results: bool = False,
exact_result: bool = False,
max_listing_days: Optional[int] = None,
blocked_company_names: Optional[List[str]] = None,
domain: Optional[str] = None,
Expand All @@ -38,6 +39,7 @@ def __init__(self,
DEFAULT_SEARCH_RADIUS.
return_similar_results (Optional[bool], optional): return similar.
results (indeed), Defaults to False.
exact_result: search for exact query.
max_listing_days (Optional[int], optional): oldest listing to show.
Defaults to DEFAULT_MAX_LISTING_DAYS.
blocked_company_names (Optional[List[str]]): list of names of
Expand All @@ -54,6 +56,7 @@ def __init__(self,
self.providers = providers
self.keywords = keywords
self.return_similar_results = return_similar_results # Indeed.X thing
self.exact_result = exact_result
self.max_listing_days = max_listing_days or DEFAULT_MAX_LISTING_DAYS
self.blocked_company_names = blocked_company_names
self.remoteness = remoteness
Expand Down
5 changes: 5 additions & 0 deletions jobfunnel/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@
'type': 'boolean',
'default': DEFAULT_RETURN_SIMILAR_RESULTS,
},
'exact_result': {
'required': False,
'type': 'boolean',
'default': DEFAULT_EXACT_RESULT,
},
'keywords': {
'required': True,
'type': 'list',
Expand Down
1 change: 1 addition & 0 deletions jobfunnel/resources/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
DEFAULT_PROVIDERS = [Provider.MONSTER, Provider.INDEED] #, Provider.GLASSDOOR]
DEFAULT_PROVIDER_NAMES = [p.name for p in DEFAULT_PROVIDERS]
DEFAULT_RETURN_SIMILAR_RESULTS = False
DEFAULT_EXACT_RESULT = False
DEFAULT_RANDOM_DELAY = False
DEFAULT_RANDOM_CONVERGING_DELAY = False
DEFAULT_REMOTENESS = Remoteness.ANY
Expand Down
4 changes: 4 additions & 0 deletions tests/config/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ def test_parse_cli_build_config_dict(argv, exp_exception):
assert cfg['search']['similar_results']
else:
assert not cfg['search']['similar_results']
if '--exact-result' in argv:
assert cfg['search']['exact_result']
else:
assert not cfg['search']['exact_result']

assert cfg['delay']['algorithm'] == 'LINEAR'
assert cfg['delay']['max_duration'] == 8
Expand Down

0 comments on commit 616d351

Please sign in to comment.