-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_bib.py
81 lines (72 loc) · 2.29 KB
/
check_bib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import sys
import numpy as np
def print_duplicates(non_uniques, ids, keys):
"""
Helper function for a nice printout of duplicates.
"""
for e in non_uniques:
equiv_str = ""
for id, key in zip(ids, keys):
if id == e:
equiv_str += key + " <-> "
print(e + " | " + equiv_str[:-5])
def check_bib_file_for_duplicates(bibfile):
"""
This function checks a bib file for duplicates and prints the findings.
Parameters
----------
bibfile : str
The name of the bib file to be checked.
Returns
-------
None
Prints
------
Prints the arXiv and DOI IDs that are duplicated in the bib file.
"""
arxiv, doi = [], []
arxiv_key, doi_key = [], []
print("% Checking bib file {:s} for duplicates...".format(bibfile))
with open(bibfile, "r") as f:
key = ""
for line in f:
t = line.split("@")
if len(t) > 1:
key = t[1].split("{")[1].split(",")[0]
t = line.split("eprint = ")
if len(t) > 1:
arxiv.append(t[1].split(",\n")[0][1:-1])
arxiv_key.append(key)
t = line.split("doi = ")
if len(t) > 1:
doi.append(t[1].split(",\n")[0][1:-1])
doi_key.append(key)
u, c = np.unique(arxiv, return_counts=True)
d_arxiv = u[c > 1]
n_d_arxiv = len(d_arxiv)
u, c = np.unique(doi, return_counts=True)
d_doi = u[c > 1]
n_d_doi = len(d_doi)
if n_d_arxiv + n_d_doi > 0:
print(
"% WARNING. {:d} duplicate arXiv ID(s) and {:d} duplicate DOI(s) detected!".format(
n_d_arxiv, n_d_doi
)
)
print(
"% You need to fix the following equivalent keys for the unique IDs listed below:"
)
print("ID | Keys")
if n_d_arxiv > 0:
print_duplicates(d_arxiv, arxiv, arxiv_key)
if n_d_doi > 0:
print_duplicates(d_doi, doi, doi_key)
else:
print("% Done, no duplicates detected!")
# If the script is run directly, run the main function
if __name__ == "__main__":
if len(sys.argv) > 1:
bibfile = sys.argv[1]
check_bib_file_for_duplicates(bibfile)
else:
print("Please provide a BibTeX filename!")