Skip to content

Commit a86ed83

Browse files
author
duzumaki
committed
Speed up import scanning
1 parent 19c036b commit a86ed83

File tree

1 file changed

+47
-12
lines changed

1 file changed

+47
-12
lines changed

src/grimp/application/usecases.py

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
"""
22
Use cases handle application logic.
33
"""
4+
5+
from concurrent.futures import ProcessPoolExecutor
6+
from functools import partial
7+
from math import ceil
48
from typing import Dict, Sequence, Set, Type, Union, cast
59

6-
from ..application.ports import caching
10+
from ..application.ports import caching, modulefinder
711
from ..application.ports.filesystem import AbstractFileSystem
812
from ..application.ports.graph import ImportGraph
913
from ..application.ports.importscanner import AbstractImportScanner
@@ -99,6 +103,28 @@ def _validate_package_names_are_strings(
99103
return cast(Sequence[str], package_names)
100104

101105

106+
def get_imports_by_module(
107+
module_file: modulefinder.ModuleFile,
108+
cache_dir: Union[str, Type[NotSupplied], None],
109+
import_scanner: AbstractImportScanner,
110+
exclude_type_checking_imports: bool,
111+
cache: caching.Cache,
112+
):
113+
module = module_file.module
114+
direct_imports = set()
115+
116+
try:
117+
if cache_dir is None:
118+
raise caching.CacheMiss
119+
direct_imports = cache.read_imports(module_file)
120+
except caching.CacheMiss:
121+
direct_imports = import_scanner.scan_for_imports(
122+
module, exclude_type_checking_imports=exclude_type_checking_imports
123+
)
124+
125+
return module, direct_imports
126+
127+
102128
def _scan_packages(
103129
found_packages: Set[FoundPackage],
104130
file_system: AbstractFileSystem,
@@ -107,6 +133,8 @@ def _scan_packages(
107133
cache_dir: Union[str, Type[NotSupplied], None],
108134
) -> Dict[Module, Set[DirectImport]]:
109135
imports_by_module: Dict[Module, Set[DirectImport]] = {}
136+
cache = None
137+
110138
if cache_dir is not None:
111139
cache_dir_if_supplied = cache_dir if cache_dir != NotSupplied else None
112140
cache: caching.Cache = settings.CACHE_CLASS.setup(
@@ -122,18 +150,25 @@ def _scan_packages(
122150
include_external_packages=include_external_packages,
123151
)
124152

153+
partial_get_imports_by_module = partial(
154+
get_imports_by_module,
155+
cache_dir=cache_dir,
156+
import_scanner=import_scanner,
157+
exclude_type_checking_imports=exclude_type_checking_imports,
158+
cache=cache,
159+
)
160+
125161
for found_package in found_packages:
126-
for module_file in found_package.module_files:
127-
module = module_file.module
128-
try:
129-
if cache_dir is None:
130-
raise caching.CacheMiss
131-
direct_imports = cache.read_imports(module_file)
132-
except caching.CacheMiss:
133-
direct_imports = import_scanner.scan_for_imports(
134-
module, exclude_type_checking_imports=exclude_type_checking_imports
135-
)
136-
imports_by_module[module] = direct_imports
162+
with ProcessPoolExecutor(max_workers=8) as executor:
163+
chunk_size = ceil(len(found_package.module_files) / executor._max_workers) or 1
164+
165+
for future_result in executor.map(
166+
partial_get_imports_by_module,
167+
found_package.module_files,
168+
chunksize=chunk_size,
169+
):
170+
module, direct_imports = future_result
171+
imports_by_module[module] = direct_imports
137172

138173
if cache_dir is not None:
139174
cache.write(imports_by_module)

0 commit comments

Comments
 (0)