-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcockney.py
executable file
·82 lines (65 loc) · 2.24 KB
/
cockney.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
import requests
import lxml.html
import sys
import codecs
import difflib
from collections import defaultdict
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
if len(sys.argv) < 6:
sys.exit()
term = ' '.join(sys.argv[5:])
response = requests.get('http://www.aldertons.com/english-.htm')
# http://abceda.com/cockney.html
# http://www.freelang.net/dictionary/docs/html_cockney_english.php
# http://www.freelang.net/dictionary/docs/html_english_cockney.php
# add http://www.cockneyrhymingslang.co.uk/english/A ?
content = response.content.decode('cp1252')
root = lxml.html.document_fromstring(content)
cells = root.xpath('(//table)[2]//td[position() = 1 or position() = 2]')
cells = [c.text_content().strip() for c in cells]
pairs = zip(*[iter(cells)]*2)
pairs += [
# (English, Cockney)
# Some clearly apocryphal (prune), but in Ronnie Barker sketch:
# https://www.youtube.com/watch?v=4RwyPDPlFA8
('shoes', 'how-de-doos'),
('dirty', 'two-thirty'),
('quid', 'saucepan lid'),
('drawers', 'early doors'),
('pub', 'rub-a-dub'),
('drink', 'tumble down the sink'),
('pissed (drunk)', 'Mozart and Liszt'),
('tune', 'stewed prune'),
('trouble', 'froth and bubble'),
('fire', 'Jeremiah'),
('chair', 'Lionel Blair'),
('C**t', 'berk'),
('C**t', 'Berkeley Hunt'),
('C**t', 'James Blunt'),
('cafe', 'Colonel Gaddafi'),
# http://news.bbc.co.uk/1/hi/england/london/8217499.stm
# http://www.huffingtonpost.co.uk/2012/07/23/london-2012-cockney-atm-machines-installed_n_1695066.html
('PIN', 'Huckleberry Finn'),
('screen', 'Charlie Sheen'),
('receipt', 'Fleet Street'),
('twenty', 'horn of plenty'),
('card', 'bladder of lard'),
]
defns = defaultdict(list)
for en, crs in pairs:
crs = crs.replace('-', ' ')
crs = crs.lower()
defns[crs].append(en)
term = term.replace('-', ' ')
term = term.lower()
matches = difflib.get_close_matches(term, defns, 1, 0.2)
# NB need to cover abbreviations (e.g. "trouble" for "trouble and strife", currently matches "toby ale")
if matches:
ens = defns[matches[0]]
ens_list, ens_last = ', '.join(ens[:-1]), ens[-1]
if ens_list:
ens_text = '%s or %s' % (ens_list, ens_last)
else:
ens_text = ens_last
print ('%s is cockney rhyming slang for %s' % (matches[0], ens_text))