-
Notifications
You must be signed in to change notification settings - Fork 0
/
isbn.py
104 lines (83 loc) · 3.03 KB
/
isbn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#! /usr/bin/python
# -*- coding: utf-8 -*-
"""Codes specifically related to ISBNs."""
import re
import requests
import commons
import bibtex
import adinebook
# original regex from: https://www.debuggex.com/r/0Npla56ipD5aeTr9
isbn13_regex = re.compile(
r'97(?:8|9)([ -]?)(?=\d{1,5}\1?\d{1,7}\1?\d{1,6}\1?\d)(?:\d\1*){9}\d'
)
# original regex from: https://www.debuggex.com/r/2s3Wld3CVCR1wKoZ
isbn10_regex = re.compile(
r'(?=\d{1,5}([ -]?)\d{1,7}\1?\d{1,6}\1?\d)(?:\d\1*){9}[\dX]'
)
# original regex from: http://stackoverflow.com/a/14260708/2705757
isbn_regex = re.compile(
r'(?=[-0-9 ]{17}|[-0-9X ]{13}|[0-9X]{10})(?:97[89][- ]?)\
?[0-9]{1,5}[- ]?(?:[0-9]+[- ]?){2}[0-9X]'
)
class IsbnError(Exception):
"""Raise when bibliographic information is not available."""
pass
class Response(commons.BaseResponse):
"""Create isbn's response object."""
def __init__(self, isbn_container_string, pure=False,
date_format='%Y-%m-%d'):
"""Make the dictionary and run self.generate()."""
self.date_format = date_format
if pure:
self.isbn = isbn_container_string
else:
# search for isbn13
m = re.search(isbn13_regex, isbn_container_string)
if m:
self.isbn = m.group(0)
else:
# search for isbn10
m = re.search(isbn10_regex, isbn_container_string)
self.isbn = m.group(0)
adinebook_url = adinebook.isbn2url(self.isbn)
a = adinebook.url2dictionary(adinebook_url)
self.bibtex = ottobib(self.isbn)
o = bibtex.parse(self.bibtex)
self.dictionary = choose_dict(a, o)
if not a and not o:
raise IsbnError('Bibliographic information not found.')
if 'language' not in self.dictionary:
self.detect_language(self.dictionary['title'])
self.generate()
def choose_dict(adinebook, ottobib):
'''Choose which source to use.
Return adinebook if both sourses contain the same ISBN or if adinebook is None.
Background: adinebook.com ommits 3 digits from it's isbn when converting them to
urls. This may make them volnarable to resolving into wrong ISBN.
'''
if adinebook and ottobib:
# both exist
if isbn2int(adinebook['isbn']) == isbn2int(ottobib['isbn']):
# both isbns are equal
return adinebook
else:
# isbns are not equal
return ottobib
elif adinebook:
# only adinebook exists
return adinebook
else:
# only ottobib exists
return ottobib
def isbn2int(isbn):
"""Get ISBN string and return it as in integer."""
isbn = isbn.replace('-', '')
isbn = isbn.replace(' ', '')
return int(isbn)
def ottobib(isbn):
"""Convert ISBN to bibtex using ottobib.com."""
ottobib_url = 'http://www.ottobib.com/isbn/' + isbn + '/bibtex'
ottobib_html = requests.get(ottobib_url).text
m = re.search('<textarea.*>(.*)</textarea>', ottobib_html, re.DOTALL)
bibtex = m.group(1)
return bibtex