Skip to content

Commit 61eac9f

Browse files
Add support for bad file recovery (#5)
Co-authored-by: Axel Dahl <[email protected]>
1 parent 240766a commit 61eac9f

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

duplicate_code_detection.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,13 @@ def run(fail_threshold, directories, files, ignore_directories, ignore_files,
135135
# Parse the contents of all the source files
136136
source_code = OrderedDict()
137137
for source_code_file in source_code_files:
138-
with open(source_code_file, 'r') as f:
139-
# Store source code with the file path as the key
140-
source_code[source_code_file] = f.read()
138+
try:
139+
# read file but also recover from encoding errors in source files
140+
with open(source_code_file, 'r', errors='surrogateescape') as f:
141+
# Store source code with the file path as the key
142+
source_code[source_code_file] = f.read()
143+
except Exception as err:
144+
print(f'ERROR: Failed to open file {source_code_file}, reason: {str(err)}')
141145

142146
# Create a Similarity object of all the source code
143147
gen_docs = [[word.lower() for word in word_tokenize(source_code[source_file])]

0 commit comments

Comments
 (0)