Skip to content

Commit

Permalink
fixup: transliterator and spec rewrite.
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmix committed Feb 13, 2024
1 parent fa04053 commit 72b9fea
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 126 deletions.
6 changes: 6 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@ gem "unicorn"
gem "mechanize"
gem "unicode_utils"
gem "sinatra"

group :development, :test do
gem "rspec"
gem "rubocop"
gem "pry"
end
117 changes: 91 additions & 26 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,68 +1,133 @@
GEM
remote: https://rubygems.org/
specs:
addressable (2.8.0)
public_suffix (>= 2.0.2, < 5.0)
connection_pool (2.2.5)
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
addressable (2.8.6)
public_suffix (>= 2.0.2, < 6.0)
ast (2.4.2)
base64 (0.2.0)
coderay (1.1.3)
connection_pool (2.4.1)
diff-lcs (1.5.1)
domain_name (0.6.20240107)
http-cookie (1.0.5)
domain_name (~> 0.5)
json (2.7.1)
kgio (2.11.4)
mechanize (2.8.5)
language_server-protocol (3.17.0.3)
mechanize (2.10.0)
addressable (~> 2.8)
base64
domain_name (~> 0.5, >= 0.5.20190701)
http-cookie (~> 1.0, >= 1.0.3)
mime-types (~> 3.0)
net-http-digest_auth (~> 1.4, >= 1.4.1)
net-http-persistent (>= 2.5.2, < 5.0.dev)
nkf
nokogiri (~> 1.11, >= 1.11.2)
rubyntlm (~> 0.6, >= 0.6.3)
webrick (~> 1.7)
webrobots (~> 0.1.2)
mime-types (3.4.1)
method_source (1.0.0)
mime-types (3.5.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2022.0105)
mustermann (2.0.2)
mime-types-data (3.2024.0206)
mustermann (3.0.0)
ruby2_keywords (~> 0.0.1)
net-http-digest_auth (1.4.1)
net-http-persistent (4.0.1)
net-http-persistent (4.0.2)
connection_pool (~> 2.2)
nkf (0.2.0)
nokogiri (1.16.2-aarch64-linux)
racc (~> 1.4)
nokogiri (1.16.2-arm-linux)
racc (~> 1.4)
nokogiri (1.16.2-arm64-darwin)
racc (~> 1.4)
nokogiri (1.16.2-x86-linux)
racc (~> 1.4)
nokogiri (1.16.2-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.16.2-x86_64-linux)
racc (~> 1.4)
public_suffix (4.0.7)
parallel (1.24.0)
parser (3.3.0.5)
ast (~> 2.4.1)
racc
pry (0.14.2)
coderay (~> 1.1)
method_source (~> 1.0)
public_suffix (5.0.4)
racc (1.7.3)
rack (2.2.6.4)
rack-protection (2.2.3)
rack
raindrops (0.20.0)
rack (3.0.9)
rack-protection (4.0.0)
base64 (>= 0.1.0)
rack (>= 3.0.0, < 4)
rack-session (2.0.0)
rack (>= 3.0.0)
rainbow (3.1.1)
raindrops (0.20.1)
regexp_parser (2.9.0)
rexml (3.2.6)
rspec (3.13.0)
rspec-core (~> 3.13.0)
rspec-expectations (~> 3.13.0)
rspec-mocks (~> 3.13.0)
rspec-core (3.13.0)
rspec-support (~> 3.13.0)
rspec-expectations (3.13.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-mocks (3.13.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-support (3.13.0)
rubocop (1.60.2)
json (~> 2.3)
language_server-protocol (>= 3.17.0)
parallel (~> 1.10)
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.8, < 3.0)
rexml (>= 3.2.5, < 4.0)
rubocop-ast (>= 1.30.0, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.30.0)
parser (>= 3.2.1.0)
ruby-progressbar (1.13.0)
ruby2_keywords (0.0.5)
rubyntlm (0.6.3)
sinatra (2.2.3)
mustermann (~> 2.0)
rack (~> 2.2)
rack-protection (= 2.2.3)
sinatra (4.0.0)
mustermann (~> 3.0)
rack (>= 3.0.0, < 4)
rack-protection (= 4.0.0)
rack-session (>= 2.0.0, < 3)
tilt (~> 2.0)
tilt (2.1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
tilt (2.3.0)
unicode-display_width (2.5.0)
unicode_utils (1.4.0)
unicorn (6.1.0)
kgio (~> 2.6)
raindrops (~> 0.7)
webrick (1.7.0)
webrick (1.8.1)
webrobots (0.1.2)

PLATFORMS
aarch64-linux
arm-linux
arm64-darwin
x86-linux
x86_64-darwin
x86_64-linux
x86_64-linux-musl

DEPENDENCIES
mechanize
pry
rspec
rubocop
sinatra
unicode_utils
unicorn

BUNDLED WITH
2.3.17
2.5.3
62 changes: 31 additions & 31 deletions app.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,47 @@ def searchOTE(phoneNo)

match = greekNo.match phoneNo if match.nil?

unless match.nil?
pageurl = 'https://www.11888.gr'
return if match.nil?

a = Mechanize.new
a.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'
page = a.get(pageurl)
cookies = page.header['Set-Cookie']
pageurl = 'https://www.11888.gr'

# Get the CSRF token.
tokenRE = /csrftoken=(.*?);/
a = Mechanize.new
a.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'
page = a.get(pageurl)
cookies = page.header['Set-Cookie']

csrftoken = '1'
csrftokenMatches = tokenRE.match cookies
# Get the CSRF token.
tokenRE = /csrftoken=(.*?);/

csrftoken = csrftokenMatches[1] unless csrftokenMatches.nil?
csrftoken = '1'
csrftokenMatches = tokenRE.match cookies

a.pre_connect_hooks << lambda do |_agent, request|
request['X-Requested-With'] = 'XMLHttpRequest'
request['Cookie'] = 'csrftoken=' + csrftoken + ';'
request['Accept'] = 'Accept: application/json, text/javascript, */*; q=0.01'
request['Referer'] = 'https://www.11888.gr'
end
csrftoken = csrftokenMatches[1] unless csrftokenMatches.nil?

pageurl = 'https://www.11888.gr/search/reverse/?phone=' + match[1]
a.pre_connect_hooks << lambda do |_agent, request|
request['X-Requested-With'] = 'XMLHttpRequest'
request['Cookie'] = 'csrftoken=' + csrftoken + ';'
request['Accept'] = 'Accept: application/json, text/javascript, */*; q=0.01'
request['Referer'] = 'https://www.11888.gr'
end

# Second call for the actual data (returned in JSON)
page = a.get(pageurl)
pageurl = 'https://www.11888.gr/search/reverse/?phone=' + match[1]

begin
parsed = JSON.parse(page.content)
unless parsed['data']['wp'].empty?
nameComps = parsed['data']['wp'][0]['name']
fullName = "#{nameComps['first'] || ''} #{nameComps['last']}"
fullName.strip!
end
rescue JSON::ParserError
fullName = nil
end
# Second call for the actual data (returned in JSON)
page = a.get(pageurl)

Transliterator.grToLat(fullName).gsub(%r{</?.*?>}, '').gsub(/\n/, ' - ') unless fullName.nil?
begin
parsed = JSON.parse(page.content)
unless parsed['data']['wp'].empty?
nameComps = parsed['data']['wp'][0]['name']
fullName = "#{nameComps['first'] || ''} #{nameComps['last']}"
fullName.strip!
end
rescue JSON::ParserError
fullName = nil
end

Transliterator.gr_to_lat(CGI.escapeHTML(fullName)).gsub(%r{</?.*?>}, '').gsub(/\n/, ' - ') unless fullName.nil?
end

def searchAll(phoneNumber)
Expand Down
94 changes: 49 additions & 45 deletions transliterator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,55 +2,59 @@
require 'unicode_utils/titlecase'

module Transliterator
GreekChars = Array['α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 'π', 'ρ', 'σ', 'τ', 'υ', 'φ', 'χ', 'ψ', 'ω', 'ς']
LatinChars = Array['a', 'v', 'g', 'd', 'e', 'z', 'i', 'th', 'i', 'k', 'l', 'm', 'n', 'x', 'o', 'p', 'r', 's', 't', 'i', 'f', 'ch', 'ps', 'o', 's']
SpecChars = Array['ο', 'α', 'ε']

def self.grToLat(inString)
outString = ''
inString = UnicodeUtils.downcase(inString)

specCharFound = nil

inString.each_char do |ch|
if GreekChars.include?(ch)

if !specCharFound.nil?
if specCharFound == 'ο'
if ch == 'υ'
outString += 'u'
specCharFound = nil
next
end
elsif specCharFound == 'α'
if ch == 'υ'
outString += 'f'
specCharFound = nil
next
elsif ch == 'ι'
outString += 'e'
specCharFound = nil
next
end
elsif specCharFound == 'ε'
if ch == 'υ'
outString += 'f'
specCharFound = nil
next
end
end

outString += LatinChars[GreekChars.index(ch)]
specCharFound = nil
GREEK_TO_LATIN = {
# Basic mappings
'α' => 'a', 'β' => 'b', 'γ' => 'g', 'δ' => 'd', 'ε' => 'e',
'ζ' => 'z', 'η' => 'i', 'θ' => 'th', 'ι' => 'i', 'κ' => 'k',
'λ' => 'l', 'μ' => 'm', 'ν' => 'n', 'ξ' => 'x', 'ο' => 'o',
'π' => 'p', 'ρ' => 'r', 'σ' => 's', 'τ' => 't', 'υ' => 'y',
'φ' => 'f', 'χ' => 'ch', 'ψ' => 'ps', 'ω' => 'o', 'ς' => 's'
}.freeze
# Special rules for DIPTHONGS followed by specific characters
# Including 'ευ' and 'αυ' cases based on the next character

DIPTHONGS = {
'ευ' => { 'default' => 'ef', 'exceptions' => 'ev' },
'αυ' => { 'default' => 'af', 'exceptions' => 'av' }
}.freeze

VOWELS = %w[α ε η ι ο υ ω].freeze

def self.gr_to_lat(in_string) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
out_string = ''
normalized_string = UnicodeUtils.downcase(in_string).unicode_normalize(:nfd).gsub(/[\u0300-\u036f]/, '')

i = 0
while i < normalized_string.length
ch = normalized_string[i]
next_ch = normalized_string[i + 1] || ''

# Handle DIPTHONGS
diphthong_handled = false
DIPTHONGS.each do |diphthong, rules|
next unless (ch + next_ch).start_with?(diphthong)

out_string += if i + 2 < normalized_string.length && VOWELS.include?(normalized_string[i + 2])
rules['exceptions']
else
rules['default']
end
i += 2 # Skip the next character as part of diphthong
diphthong_handled = true
break
end

unless diphthong_handled
if GREEK_TO_LATIN[ch]
out_string += GREEK_TO_LATIN[ch]
i += 1
else
specCharFound = ch if SpecChars.include?(ch)
outString += LatinChars[GreekChars.index(ch)]
out_string += ch
i += 1
end
else
outString += ch
end
end
UnicodeUtils.titlecase(outString)

UnicodeUtils.titlecase(out_string)
end
end
Loading

0 comments on commit 72b9fea

Please sign in to comment.