diff --git a/crawler/utils/file_utils.py b/crawler/utils/file_utils.py index 889232df..f1fb99e2 100644 --- a/crawler/utils/file_utils.py +++ b/crawler/utils/file_utils.py @@ -1,4 +1,3 @@ -import fnmatch import logging import os import re @@ -26,8 +25,7 @@ def crawl_files( root_dir_alias = root_dir exclude_dirs = [os.path.join(root_dir, d) for d in exclude_dirs] - exclude_regex = r'|'.join([fnmatch.translate(d) - for d in exclude_dirs]) or r'$.' + exclude_regex = re.compile(r'|'.join([d for d in exclude_dirs])) # walk the directory hierarchy starting at 'root_dir' in BFS # order @@ -72,6 +70,7 @@ def _filetype(fpath, fperm): }.get(modebit) return ftype + _filemode_table = ( ( (stat.S_IFLNK, 'l'),