Skip to content

Commit

Permalink
fix: tlds property should return list of tlds used for current config
Browse files Browse the repository at this point in the history
  • Loading branch information
brycedrennan authored and john-kurkowski committed Oct 22, 2020
1 parent 527df5b commit be5fb11
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 8 deletions.
14 changes: 14 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,17 @@ def test_cache_timeouts(tmpdir):

with pytest.raises(SuffixListNotFound):
tldextract.suffix_list.find_first_response(cache, [server], 5)


def test_tlds_property():
extract_private = tldextract.TLDExtract(
cache_dir=None,
suffix_list_urls=None,
include_psl_private_domains=True
)
extract_public = tldextract.TLDExtract(
cache_dir=None,
suffix_list_urls=None,
include_psl_private_domains=False
)
assert len(extract_private.tlds) > len(extract_public.tlds)
30 changes: 22 additions & 8 deletions tldextract/tldextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,6 @@ def __call__(self, url, include_psl_private_domains=None):
>>> extract('http://forums.bbc.co.uk/')
ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk')
"""
if include_psl_private_domains is None:
include_psl_private_domains = self.include_psl_private_domains

netloc = SCHEME_RE.sub("", url) \
.partition("/")[0] \
Expand Down Expand Up @@ -242,7 +240,12 @@ def update(self, fetch_now=False):

@property
def tlds(self):
return self._get_tld_extractor().tlds
"""
Returns the list of tld's used by default
This will vary based on `include_psl_private_domains` and `extra_suffixes`
"""
return list(self._get_tld_extractor().tlds())

def _get_tld_extractor(self):
'''Get or compute this object's TLDExtractor. Looks up the TLDExtractor
Expand Down Expand Up @@ -271,7 +274,8 @@ def _get_tld_extractor(self):
self._extractor = _PublicSuffixListTLDExtractor(
public_tlds=public_tlds,
private_tlds=private_tlds,
extra_tlds=list(self.extra_suffixes)
extra_tlds=list(self.extra_suffixes),
include_psl_private_domains=self.include_psl_private_domains
)
return self._extractor

Expand All @@ -289,20 +293,30 @@ def update(*args, **kwargs):
return TLD_EXTRACTOR.update(*args, **kwargs)


class _PublicSuffixListTLDExtractor(object):
class _PublicSuffixListTLDExtractor:
"""Wrapper around this project's main algo for PSL
lookups.
"""

def __init__(self, public_tlds, private_tlds, extra_tlds):
def __init__(self, public_tlds, private_tlds, extra_tlds, include_psl_private_domains=False):
# set the default value
self.include_psl_private_domains = include_psl_private_domains
self.public_tlds = public_tlds
self.private_tlds = private_tlds
self.tlds_incl_private = frozenset(public_tlds + private_tlds + extra_tlds)
self.tlds_excl_private = frozenset(public_tlds + extra_tlds)

def suffix_index(self, lower_spl, include_psl_private_domains=False):
def tlds(self, include_psl_private_domains=None):
if include_psl_private_domains is None:
include_psl_private_domains = self.include_psl_private_domains

return self.tlds_incl_private if include_psl_private_domains else self.tlds_excl_private

def suffix_index(self, lower_spl, include_psl_private_domains=None):
"""Returns the index of the first suffix label.
Returns len(spl) if no suffix is found
"""
tlds = self.tlds_incl_private if include_psl_private_domains else self.tlds_excl_private
tlds = self.tlds(include_psl_private_domains)
length = len(lower_spl)
for i in range(length):
maybe_tld = '.'.join(lower_spl[i:])
Expand Down

0 comments on commit be5fb11

Please sign in to comment.