From c63b6bf7d08a97dd0a0582639533f063a77448f1 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Wed, 7 Mar 2018 12:55:57 +0300 Subject: [PATCH] Revert new_apis --- .gitignore | 2 + README.md | 36 +++--- circle.yml | 25 +++-- install_api.sql | 41 +++---- install_api_vars.sql | 17 +++ install_bing_core.sql | 48 ++++++++ install_bing_vars.sql | 3 + install_global_core.sql | 100 +++++++++++++++++ install_global_vars.sql | 0 install_google_core.sql | 173 ++++++++++++++++++++++++++++ install_google_vars.sql | 4 + install_promt_core.sql | 229 ++++++++++++++++++++++++++++++++++++++ install_promt_vars.sql | 8 ++ migrations/2017-04-06.sql | 1 + setup-example.yml | 36 ++++++ setup.rb | 83 ++++++++++++++ uninstall_api.sql | 2 +- uninstall_core.sql | 3 +- 18 files changed, 759 insertions(+), 52 deletions(-) create mode 100644 install_api_vars.sql create mode 100644 install_bing_core.sql create mode 100644 install_bing_vars.sql create mode 100644 install_global_core.sql create mode 100644 install_global_vars.sql create mode 100644 install_google_core.sql create mode 100644 install_google_vars.sql create mode 100644 install_promt_core.sql create mode 100644 install_promt_vars.sql create mode 100644 migrations/2017-04-06.sql create mode 100644 setup-example.yml create mode 100755 setup.rb diff --git a/.gitignore b/.gitignore index 1377554..fbd18ed 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *.swp +*~ +/setup.yml diff --git a/README.md b/README.md index 8f993a6..b75eac6 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ -# postgrest-google-translate -PostgreSQL/PostgREST proxy to Google Translate API, with caching and ability to combine multiple text segments in one single request. It allows to work with Google Translate API right from Postgres or via REST API requests. +# postgrest-translation-proxy +PostgreSQL/PostgREST proxy to Google, Bing and Prompt Translate APIs, with caching and ability to combine multiple text segments in one single request. It allows to work with those Translate APIs right from Postgres or via REST API requests. [![Build Status](https://circleci.com/gh/NikolayS/postgrest-google-translate.png?style=shield&circle-token=fb58aee6e9f98cf85d08c4d382d5ba3f0f548e08)](https://circleci.com/gh/NikolayS/postgrest-google-translate/tree/master) This tiny project consists of 2 parts: -1. SQL objects to enable calling Google API right from SQL environment (uses [plsh](https://github.com/petere/plsh) extension) +1. SQL objects to enable calling Translation APIs right from SQL environment (uses [plsh](https://github.com/petere/plsh) extension) 2. API method (uses [PostgREST](http://postgrest.com)) Part (1) can be used without part (2). -Table `google_translate.cache` is used to cache Google API responses to speedup work and reduce costs. +Table `translation_proxy.cache` is used to cache API responses to speedup work and reduce costs. Also, it is possible to combine multiple phrases in one API request, which provides great advantage (e.g.: for 10 uncached phrases, it will be ~150-200ms for single aggregated request versus 1.5-2 seconds for 10 consequent requests). Currently, Google Translate API accepts up to 128 text segments in a single request. :warning: Limitations @@ -29,22 +29,32 @@ Dependencies --- 1. cURL 2. [PostgREST](http://postgrest.com) – download the latest version. See `circle.yml` for example of starting/using it. -2. `plsh` – PostgreSQL contrib module, it is NOT included to standard contribs package. To install it on Ubuntu/Debian run: `apt-get install postgresql-X.X-plsh` (where X.X could be 9.5, depending on your Postgres version) +3. `plsh` – PostgreSQL contrib module, it is NOT included to standard contribs package. To install it on Ubuntu/Debian run: `apt-get install postgresql-X.X-plsh` (where X.X could be 9.5, depending on your Postgres version). For Archlinux use AUR package 'postgresql-plsh'. +4. Ruby for easy installer (optional) Installation and Configuration --- -For your database (here we assume that it's called `DBNAME`), install [plsh](https://github.com/petere/plsh) extension and then execute two SQL scripts, after what configure your database setting `google_translate.api_key` (take it from Google Could Platform Console): +Simple method +---- +Edit `setup.yml` then execute `setup.rb`. You need to have the ruby interpreter been installed. + +Step-by-step method +---- +For your database (here we assume that it's called `DBNAME`), install [plsh](https://github.com/petere/plsh) extension and then execute `_core` SQL scripts, after what configure your database settings: +`translation_proxy.promt_api_key`, `translation_proxy.bing_api_key` and +`translation_proxy.google_api_key` (take it from Google Could Platform Console): ```sh psql DBNAME -c 'create extension if not exists plsh;' psql DBNAME -f install_core.sql -psql -c "alter database DBNAME set google_translate.api_key = 'YOU_GOOGLE_API_KEY';" -psql -c "alter database DBNAME set google_translate.begin_at = '2000-01-01';" -psql -c "alter database DBNAME set google_translate.end_at = '2100-01-01';" +psql -c "alter database DBNAME set translation_proxy.google_api_key = 'YOUR_GOOGLE_API_KEY';" +psql -c "alter database DBNAME set translation_proxy.google_begin_at = '2000-01-01';" +psql -c "alter database DBNAME set translation_proxy.google_end_at = '2100-01-01';" ``` -Alternatively, you can use `ALTER ROLE ... SET google_translate.api_key = 'YOU_GOOGLE_API_KEY';` or put this setting to `postgresql.conf` or do `ALTER SYSTEM SET google_translate.api_key = 'YOU_GOOGLE_API_KEY';` (in these cases, it will be available cluster-wide). -Parameters `google_translate.begin_at` and `google_translate.end_at` are responsible for the period of time, when Google Translate API is allowed to be called. If current time is beyond this timeframe, onlic cache will be used. +Alternatively, you can use `ALTER ROLE ... SET translation_proxy.google_api_key = 'YOUR_GOOGLE_API_KEY';` or put this setting to `postgresql.conf` or do `ALTER SYSTEM SET translation_proxy.google_api_key = 'YOUR_GOOGLE_API_KEY';` (in these cases, it will be available cluster-wide). + +Parameters `translation_proxy.google_begin_at` and `translation_proxy.google_end_at` are responsible for the period of time, when Google Translate API is allowed to be called. If current time is beyond this timeframe, only the cache table will be used. To enable REST API proxy, install [PostgREST](http://postgrest.com), launch it (see `cirle.yml` as an example), and initialize API methods with the additional SQL script: ```sh @@ -64,10 +74,10 @@ Usage In SQL environment: ```sql -- Translate from English to Russian -select google_translate.translate('en', 'ru', 'Hello world'); +select translation_proxy.google_translate('en', 'ru', 'Hello world'); -- Combine multiple text segments in single query -select * from google_translate.translate('en', 'ru', array['ok computer', 'show me more','hello world!']); +select * from translation_proxy.google_translate('en', 'ru', array['ok computer', 'show me more','hello world!']); ``` REST API: diff --git a/circle.yml b/circle.yml index 6c23ef4..afa75c1 100644 --- a/circle.yml +++ b/circle.yml @@ -9,24 +9,25 @@ dependencies: - cd ~ && wget https://github.com/begriffs/postgrest/releases/download/v0.3.2.0/postgrest-0.3.2.0-ubuntu.tar.xz - cd ~ && tar xf postgrest-0.3.2.0-ubuntu.tar.xz - echo "~/postgrest postgres://apiuser:SOMEPASSWORD@localhost:5432/test --pool=200 --anonymous=apiuser --port=3000 --jwt-secret notverysecret --max-rows=500 --schema=v1" > ~/postgrest-run.sh && chmod a+x ~/postgrest-run.sh - - npm install -g npm newman + - npm install -g npm newman ava database: override: - sudo -u postgres psql -c "create role apiuser password 'SOMEPASSWORD' login;" - sudo -u postgres psql -c "create database test;" - - sudo -u postgres psql -c "alter database test set google_translate.api_key = 'AIzaSyCauv2HRjprFX3DcGhorJFYGyeVmzvunuc';" - - sudo -u postgres psql -c "alter database test set google_translate.begin_at = '2000-01-01';" - - sudo -u postgres psql -c "alter database test set google_translate.end_at = '2100-01-01'" + - sudo -u postgres psql -c "alter database test set translation_proxy.google_api_key = 'AIzaSyCauv2HRjprFX3DcGhorJFYGyeVmzvunuc';" + - sudo -u postgres psql -c "alter database test set translation_proxy.google_begin_at = '2000-01-01';" + - sudo -u postgres psql -c "alter database test set translation_proxy.google_end_at = '2100-01-01'" - sudo -u postgres psql test -c "create extension if not exists plsh;" - - sudo -u postgres psql test -f ~/postgrest-google-translate/install_core.sql - - sudo -u postgres psql test -f ~/postgrest-google-translate/install_api.sql + - sudo -u postgres psql test -f ~/postgrest-translation-proxy/install_google_core.sql + - sudo -u postgres psql test -f ~/postgrest-translation-proxy/install_promt_core.sql + - sudo -u postgres psql test -f ~/postgrest-translation-proxy/install_bing_core.sql + - sudo -u postgres psql test -f ~/postgrest-translation-proxy/install_api.sql - ~/postgrest-run.sh: - background: true + background: true test: override: - - ~/"$CIRCLE_PROJECT_REPONAME"/test/run.sh -f junit >$CIRCLE_TEST_REPORTS/junit.xml + - ~/postgrest-translation-proxy/test/run.sh -f junit >$CIRCLE_TEST_REPORTS/junit.xml - nc -z -v -w5 localhost 3000 - - newman run ~/"$CIRCLE_PROJECT_REPONAME"/test/postman/postgrest-google-translate.postman_collection --bail -e ~/postgrest-google-translate/test/postman/local.postman_environment --reporter-junit-export $CIRCLE_TEST_REPORTS/newman.xml - - sudo -u postgres psql test -v ON_ERROR_STOP=1 -f ~/"$CIRCLE_PROJECT_REPONAME"/uninstall_api.sql - - sudo -u postgres psql test -v ON_ERROR_STOP=1 -f ~/"$CIRCLE_PROJECT_REPONAME"/uninstall_core.sql - + - newman run ~/postgrest-translation-proxy/test/postman/postgrest-translation-proxy.postman_collection --bail -e ~/postgrest-translation-proxy/test/postman/local.postman_environment --reporter-junit-export $CIRCLE_TEST_REPORTS/newman.xml + - sudo -u postgres psql test -v ON_ERROR_STOP=1 -f ~/postgrest-translation-proxy/uninstall_api.sql + - sudo -u postgres psql test -v ON_ERROR_STOP=1 -f ~/postgrest-translation-proxy/uninstall_core.sql diff --git a/install_api.sql b/install_api.sql index a5b2e61..42f2cc5 100644 --- a/install_api.sql +++ b/install_api.sql @@ -1,26 +1,17 @@ -create schema if not exists v1; -do -$$ -begin - if not exists ( - select * - from pg_catalog.pg_user - where usename = 'apiuser' - ) then - create role my_user password 'SOMEPASSWORD' login; - end if; -end -$$; +CREATE OR REPLACE FUNCTION v2.translate_array(source CHAR(2), target CHAR(2), q JSON) +RETURNS TEXT[] AS $BODY$ +DECLARE + rez TEXT[]; +BEGIN + SELECT + CASE current_setting('translation_proxy.api.current_engine') + WHEN 'google' THEN + translation_proxy.google_translate_array( source, target, q ) + WHEN 'promt' THEN + translation_proxy.promt_translate_array( source, target, array_agg( json_array_elements_text(q) ) ) + END INTO rez; + RETURN rez; +END; +$BODY$ LANGUAGE PLPGSQL SECURITY DEFINER; -grant usage on schema v1 to apiuser; - -create or replace function v1.google_translate_array(source char(2), target char(2), q json) returns text[] as $$ - select * from google_translate.translate_array(source, target, q); -$$ language sql security definer; - -create or replace function v1.google_translate(source char(2), target char(2), q text) returns text as $$ - select * from google_translate.translate(source, target, q); -$$ language sql security definer; - -grant execute on function v1.google_translate_array(char, char, json) to apiuser; -grant execute on function v1.google_translate(char, char, text) to apiuser; +GRANT EXECUTE ON FUNCTION v2.translate_array(CHAR(2), CHAR(2), JSON) TO apiuser; diff --git a/install_api_vars.sql b/install_api_vars.sql new file mode 100644 index 0000000..0192e8f --- /dev/null +++ b/install_api_vars.sql @@ -0,0 +1,17 @@ +ALTER DATABASE DBNAME SET translation_proxy.api.current_engine = 'CURRENT_API_ENGINE'; + +CREATE SCHEMA IF NOT EXISTS v2; +DO +$$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_user + WHERE usename = 'apiuser' + ) THEN + CREATE ROLE apiuser PASSWORD 'APIUSER-PASSWORD' LOGIN; + END IF; +END +$$; + +GRANT USAGE ON SCHEMA v2 TO apiuser; diff --git a/install_bing_core.sql b/install_bing_core.sql new file mode 100644 index 0000000..eed82b4 --- /dev/null +++ b/install_bing_core.sql @@ -0,0 +1,48 @@ +-- main function is the bing_translate( source char(2), target char(2), qs text[] ) + +-- api_key +CREATE OR REPLACE FUNCTION translation_proxy._bing_get_token_curl(text) RETURNS INTEGER AS $$ +#!/bin/sh +KEY=$1 +ACCESS_TOKEN = `curl -X POST -H "content-type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=$CLIENTID&client_secret=$CLIENTSECRET&scope=http://api.microsofttranslator.com" \ + https://datamarket.accesscontrol.windows.net/v2/OAuth2-13 | grep -Po '"access_token":.*?[^\\]",'` + +curl -X POST --header 'Ocp-Apim-Subscription-Key: ${KEY}' --data "" 'https://api.cognitive.microsoft.com/sts/v1.0/issueToken' 2>/dev/null +$$ LANGUAGE plsh; + +CREATE OR REPLACE FUNCTION translation_proxy._bing_login() RETURNS BOOLEAN AS $$ +DECLARE + token TEXT; +BEGIN + token := translation_proxy._bing_get_token_curl(current_setting('translation_proxy.bing.api_key')); + IF token IS NOT NULL AND token <> '' THEN + UPDATE translation_proxy.authcache SET ( creds, updated ) = ( token, now() ) WHERE api_engine = 'bing'; + RETURN 't'; + ELSE + RETURN 'f'; + END IF; +END; +$$ LANGUAGE plpgsql; + +-- token, source lang, target lang, text, category +CREATE OR REPLACE FUNCTION translation_proxy._bing_translate_curl(TEXT, CHAR(2), CHAR(2), TEXT) RETURNS TEXT AS $$ +#!/bin/sh +TOKEN=$1 +SRC=$2 +DST=$3 +QUERY=$4 +$CTG=$5 +curl -X GET -H "Authorization: Bearer $TOKEN" \ + --data-urlencode "text=$QUERY" \ + --data-urlencode "from=$SRC" \ + --data-urlencode "to=$DST" \ + --data-urlencode "category=$CTG" \ + 'https://api.cognitive.microsoft.com/sts/v1.0/Translate' 2>/dev/null +$$ LANGUAGE plsh; + +CREATE OR REPLACE FUNCTION translation_proxy.bing_translate(source CHAR(2), target CHAR(2), qs TEXT[], profile TEXT DEFAULT '') +RETURNS text[] as $$ +BEGIN +END; +$$ LANGUAGE plpgsql; diff --git a/install_bing_vars.sql b/install_bing_vars.sql new file mode 100644 index 0000000..cb258ea --- /dev/null +++ b/install_bing_vars.sql @@ -0,0 +1,3 @@ +-- Microsoft Bing +ALTER DATABASE DBNAME SET translation_proxy.bing.api_key = 'YOUR_BING_API_KEY'; +ALTER DATABASE DBNAME SET translation_proxy.bing.key_expiration = 'BING_TOKEN_EXPIRATION'; diff --git a/install_global_core.sql b/install_global_core.sql new file mode 100644 index 0000000..5522225 --- /dev/null +++ b/install_global_core.sql @@ -0,0 +1,100 @@ +CREATE SCHEMA IF NOT EXISTS translation_proxy; +CREATE EXTENSION IF NOT EXISTS plsh; +CREATE EXTENSION IF NOT EXISTS plpython2u; + +CREATE TYPE translation_proxy.api_engine_type AS ENUM ('google', 'promt', 'bing'); + +CREATE TABLE translation_proxy.cache( + id BIGSERIAL PRIMARY KEY, + source char(2), -- if this is NULL, it need to be detected + target char(2) NOT NULL, + q TEXT NOT NULL, + result TEXT, -- if this is NULL, it need to be translated + profile TEXT NOT NULL DEFAULT '', + created TIMESTAMP NOT NULL DEFAULT now(), + api_engine translation_proxy.api_engine_type NOT NULL, + encoded TEXT -- urlencoded string for GET request. Is null after an successfull translation. +); + +CREATE UNIQUE INDEX u_cache_q_source_target ON translation_proxy.cache + USING btree(md5(q), source, target, api_engine, profile); +CREATE INDEX cache_created ON translation_proxy.cache ( created ); +COMMENT ON TABLE translation_proxy.cache IS 'The cache for API calls of the Translation proxy'; + +-- trigger, that URLencodes query in cache, when no translation is given +CREATE OR REPLACE FUNCTION translation_proxy._urlencode_fields() +RETURNS TRIGGER AS $BODY$ + from urllib import quote_plus + TD['new']['encoded'] = quote_plus( TD['new']['q'] ) + return 'MODIFY' +$BODY$ LANGUAGE plpython2u; + +CREATE TRIGGER _prepare_for_fetch BEFORE INSERT ON translation_proxy.cache + FOR EACH ROW + WHEN (NEW.result IS NULL) + EXECUTE PROCEDURE translation_proxy._urlencode_fields(); + +-- cookies, oauth keys and so on +CREATE TABLE translation_proxy.authcache( + api_engine translation_proxy.api_engine_type NOT NULL, + creds TEXT, + updated TIMESTAMP NOT NULL DEFAULT now() +); + +CREATE UNIQUE INDEX u_authcache_engine ON translation_proxy.authcache ( api_engine ); + +COMMENT ON TABLE translation_proxy.authcache IS 'Translation API cache for remote authorization keys'; + +INSERT INTO translation_proxy.authcache (api_engine) VALUES ('google'), ('promt'), ('bing') + ON CONFLICT DO NOTHING; + +CREATE OR REPLACE FUNCTION translation_proxy._save_cookie(engine translation_proxy.api_engine_type, cookie TEXT) +RETURNS VOID AS $$ +BEGIN + UPDATE translation_proxy.authcache + SET ( creds, updated ) = ( cookie, now() ) + WHERE api_engine = engine; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION translation_proxy._load_cookie(engine translation_proxy.api_engine_type) +RETURNS TEXT AS $$ +DECLARE + cookie TEXT; +BEGIN + SELECT creds INTO cookie FROM translation_proxy.authcache + WHERE api_engine = engine AND + updated > ( now() - current_setting('translation_proxy.promt.login_timeout')::INTERVAL ) + AND creds IS NOT NULL AND creds <> '' + LIMIT 1; + RETURN cookie; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION translation_proxy._find_detected_language(qs TEXT, engine translation_proxy.api_engine_type) +RETURNS TEXT AS $$ +DECLARE + lng CHAR(2); +BEGIN + SELECT lang INTO lng FROM translation_proxy.cache + WHERE api_engine = engine AND q = qs AND lang IS NOT NULL + LIMIT 1; + RETURN lng; +END; +$$ LANGUAGE plpgsql; + +-- adding new parameter to url until it exceeds the limit of 2000 bytes +CREATE OR REPLACE FUNCTION translation_proxy._urladd( url TEXT, a TEXT ) RETURNS TEXT AS $$ + from urllib import quote_plus + r = url + quote_plus( a ) + if len(r) > 1999 : + plpy.error('URL length is over, time to fetch.', sqlstate = 'EOURL') + return r +$$ LANGUAGE plpython2u; + +-- urlencoding utility +CREATE OR REPLACE FUNCTION translation_proxy._urlencode(q TEXT) +RETURNS TEXT AS $BODY$ + from urllib import quote_plus + return quote_plus( q ) +$BODY$ LANGUAGE plpython2u; diff --git a/install_global_vars.sql b/install_global_vars.sql new file mode 100644 index 0000000..e69de29 diff --git a/install_google_core.sql b/install_google_core.sql new file mode 100644 index 0000000..21481d3 --- /dev/null +++ b/install_google_core.sql @@ -0,0 +1,173 @@ +-- fetches translations, listed in URL and saves them into cache +-- stores current request into local session cache (SD) and calls API only on overflow +-- must be called once more after the loop with id = nil to (possibly) flush the cache +CREATE OR REPLACE FUNCTION translation_proxy._google_fetch_translations( id BIGINT, source TEXT, target TEXT, q TEXT ) +RETURNS VOID AS $BODY$ + import pycurl + from StringIO import StringIO + from urllib import quote_plus + import json + import re + + if id and target and q: + if not SD['data']: + plpy.debug('Promt: init SD') + SD['url'] = 'https://www.googleapis.com/language/translate/v2?key=' + plpy.execute("current_setting('translation_proxy.google.api_key')")[0]['current_setting'] + '&target=' + target + '&source=' + source + SD['data'] = [] + + SD['url'] += '&text=' + quote_plus (q) + SD['data'].append( [{ 'id': id, 'source': source, 'target': target, 'q': q }] ) + + if len( SD['url'] ) < 1980 and SD['data'][0]['source'] == source and SD['data'][0]['target'] == target: + return None; + + if SD['data'] and SD['url']: + plpy.debug('Fetching google, url is %s' % SD['url']) + buffer = StringIO() + curl = pycurl.Curl() + curl.setopt( pycurl.WRITEDATA, buffer ) + curl.setopt( pycurl.HTTPHEADER, [ 'Accept: application/json' ] ) + curl.setopt( pycurl.URL, SD['url'] ) + curl.perform() + answer_code = curl.getinfo( pycurl.RESPONSE_CODE ) + if answer_code != 200 : + plpy.error( "Google returned %d", answer_code ) + try: + answer = json.loads( buffer.getvalue() ) + except ValueError: + buffer.close() + curl.close() + plpy.error("Google was returned just a plain text. Maybe this is an error.", detail = buffer.getvalue() ) + + buffer.close() + curl.close() + i = 0 + update_plan = plpy.prepare( "UPDATE translation_proxy.cache SET result = $1, encoded = NULL WHERE id = $2", + [ 'TEXT', 'BIGINT' ] ) + for x in answer['data']['translations'] : + plpy.debug("Google translated for id №%d : '%s'", ( SD['data'][i]['id'], x['translatedText'] )) + t = re.sub( r'^"|"$', '', x['translatedText'] ) + plpy.execute( update_plan, [ t, SD['data'][i]['id'] ] ) + i += 1 + + plpy.debug('Clearing SD') + SD['url'] = '' + SD['data'] = [] + + return None +$BODY$ LANGUAGE plpython2u; + +-- initiate translation of all fields where result is NULL +CREATE OR REPLACE FUNCTION translation_proxy._google_start_translation() +RETURNS VOID AS $$ +DECLARE + onerec RECORD; + url_base TEXT DEFAULT ''; + src TEXT DEFAULT ''; + dst TEXT DEFAULT ''; + prf TEXT DEFAULT ''; + current_ids BIGINT[] DEFAULT ARRAY[]::BIGINT[]; -- ids, currently added to url +BEGIN + FOR onerec IN SELECT id, source, target, q, profile + FROM translation_proxy.cache + WHERE api_engine = 'google' AND result IS NULL + ORDER BY source, target, profile + FOR UPDATE SKIP LOCKED + LOOP + BEGIN + RAISE DEBUG 'onerec.id is %', onerec.id; + IF onerec.source <> src OR onerec.target <> dst OR onerec.profile <> prf THEN + RAISE DEBUG 'Parameters was changed, escaping. source is %, target is %', onerec.source, onerec.target; + src = onerec.source; dst = onerec.target; prf = onerec.profile; + RAISE EXCEPTION USING + errcode = 'EOURL', + message = 'Parameters are changed, time to fetch ' || onerec.id; + END IF; + RAISE DEBUG 'Adding more requests to url, №%', onerec.id; + url_base := translation_proxy._urladd( (url_base || '&q='), onerec.q ); + current_ids := array_append( current_ids, onerec.id ); + RAISE DEBUG 'Added id %, and continuing with url %', onerec.id, url_base; + EXCEPTION + WHEN sqlstate 'EOURL' THEN + RAISE DEBUG 'EOURL on №%', onerec.id; + IF url_base <> '' THEN + PERFORM translation_proxy._google_fetch_translations( url_base, current_ids ); + END IF; + -- pushing the last request back to the url + RAISE DEBUG 'pushing the last request back to the url'; + current_ids := ARRAY[ onerec.id ]; + SELECT 'https://www.googleapis.com/language/translate/v2?key=' || + current_setting('translation_proxy.google.api_key') || + '&target=' || onerec.target || + '&source=' || onerec.source || + '&q=' || translation_proxy._urlencode( onerec.q ) INTO url_base; + -- here it will translate even long string, one-by-one. + RAISE DEBUG 'Inside exception, url_base == %', url_base; + END; + END LOOP; + RETURN; +END; +$$ LANGUAGE plpgsql VOLATILE; + +-- main function, saves all requests to cache and initiates start_translation +CREATE OR REPLACE FUNCTION translation_proxy.google_translate( + src CHAR(2), dst CHAR(2), qs TEXT[], api_profile TEXT DEFAULT '') +RETURNS TEXT[] AS $$ +DECLARE + new_row_ids BIGINT[]; -- saving here rows that needs translation + can_remote BOOLEAN DEFAULT 'f'; + r TEXT[]; +BEGIN + SET SCHEMA 'translation_proxy'; + IF src = dst THEN + RAISE EXCEPTION '''source'' cannot be equal to ''target'' (see details)' + USING detail = 'Received equal ''source'' and ''target'': ' + || src || ', qs: [' || array_to_string(qs, ';') || ']'; + END IF; + IF array_length(qs, 1) = 0 THEN + RAISE EXCEPTION 'NEED SOMETHING TO TRANSLATE'; + END IF; + -- let google translates rows with NULL result + WITH created( saved_ids ) AS ( + INSERT INTO translation_proxy.cache ( source, target, q, profile, api_engine ) + ( SELECT src, dst, unnest(qs), api_profile, 'google' ) + ON CONFLICT (md5(q), source, target, api_engine, profile) DO + UPDATE SET source = src + -- this is dirty hack doing nothing with table only for returning all requested ids + RETURNING id ) + SELECT array_agg( saved_ids ) FROM created INTO new_row_ids; + IF ( current_setting('translation_proxy.google.valid_from') IS NOT NULL + AND current_setting('translation_proxy.google.valid_from')::timestamp < current_timestamp ) + AND ( current_setting('translation_proxy.google.valid_until') IS NOT NULL + AND current_setting('translation_proxy.google.valid_until')::timestamp > current_timestamp ) + AND array_length( new_row_ids, 1 ) > 0 THEN + PERFORM _google_start_translation(); + END IF; + -- all translations are in the cache table now + SELECT array_agg( result ) FROM cache WHERE id IN ( SELECT unnest( new_row_ids ) ) INTO r; + RETURN r; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION translation_proxy.google_translate_array(source CHAR(2), target CHAR(2), q JSON) + RETURNS TEXT[] AS $$ +DECLARE + res TEXT[]; + qs TEXT[]; + jtype TEXT; +BEGIN + IF current_setting('translation_proxy.google.api_key') IS NULL + OR current_setting('translation_proxy.google.api_key') = '' THEN + RAISE EXCEPTION 'Configuration error: translation_proxy.google.api_key has not been set'; + END IF; + jtype := json_typeof(q)::TEXT; + IF jtype <> 'array' THEN + RAISE EXCEPTION 'Unsupported format of JSON unput'; + END IF; + SELECT INTO qs ARRAY(SELECT * FROM json_array_elements_text(q)); + SELECT INTO res google_translate + FROM translation_proxy.google_translate( source, target, qs ); + + RETURN res; +END; +$$ LANGUAGE plpgsql; diff --git a/install_google_vars.sql b/install_google_vars.sql new file mode 100644 index 0000000..faa4b6c --- /dev/null +++ b/install_google_vars.sql @@ -0,0 +1,4 @@ +-- Google API +ALTER DATABASE DBNAME SET translation_proxy.google.api_key = 'YOUR_GOOGLE_API_KEY'; +ALTER DATABASE DBNAME SET translation_proxy.google.valid_from = 'GOOGLE_VALID_FROM'; +ALTER DATABASE DBNAME SET translation_proxy.google.valid_until = 'GOOGLE_VALID_UNTIL'; diff --git a/install_promt_core.sql b/install_promt_core.sql new file mode 100644 index 0000000..573bcf4 --- /dev/null +++ b/install_promt_core.sql @@ -0,0 +1,229 @@ +-- Main function is the promt_translate( source, destination, query[], profile ) +/* Overview: + 0. insert received text to cache table where result is NULL + 1. call promt_get_translation, that loads those using CURSOR + and add untranslated text to url until it exceeds 2000 limit + and then translates them on Promt server + 2. for (possible) multitasking it locks records with SELECT FOR UPDATE +*/ + +-- Dumb functions for login, logout, translate and detect lnaguage +-- authorizes on Promt API, writes cookie to db and returns it (or NULL) for next queries +CREATE OR REPLACE FUNCTION translation_proxy._promt_login() RETURNS TEXT AS $$ + import pycurl + from StringIO import StringIO + from urllib import urlencode + import json + + cookie = plpy.execute("SELECT translation_proxy._load_cookie('promt')")[0]['_load_cookie']; + if not not cookie : + plpy.debug("Found valid cookie") + return cookie + + plpy.debug("Posting auth query to Promt API") + plan = plpy.prepare("SELECT current_setting('translation_proxy.promt.server_url')") + server_url = "%s/Services/auth/rest.svc/Login" % plpy.execute(plan)[0]['current_setting'] + promt_login = plpy.execute("SELECT current_setting('translation_proxy.promt.login')")[0]['current_setting'] + promt_password = plpy.execute("SELECT current_setting('translation_proxy.promt.password')")[0]['current_setting'] + + buffer = StringIO() + curl = pycurl.Curl() + curl.setopt( curl.URL, server_url ) + curl.setopt( pycurl.HTTPHEADER, ['Accept: application/json', 'Content-Type: application/json'] ) + curl.setopt( pycurl.POST, 1 ) + curl.setopt( pycurl.POSTFIELDS, + json.dumps( + {'username': promt_login, 'password': promt_password ,'isPersistent': False } )) + curl.setopt( pycurl.WRITEDATA, buffer ) + curl.setopt( pycurl.COOKIELIST, '' ) + curl.perform() + cookie = curl.getinfo( pycurl.INFO_COOKIELIST )[0] + plan = plpy.prepare("SELECT translation_proxy._save_cookie('promt', $1)", [ 'text' ]) + plpy.execute(plan, [ cookie ] ) + answer_code = curl.getinfo( pycurl.RESPONSE_CODE ) + curl.close() + if answer_code == 200 : # writing new cookie to db + return cookie + + plpy.fatal("Can't login into Promt API") +$$ LANGUAGE plpython2u; + +-- initiate translation of all fields where result is NULL +CREATE OR REPLACE FUNCTION translation_proxy._promt_start_translation() +RETURNS VOID AS $$ + import pycurl + from StringIO import StringIO + from urllib import urlencode + import json + import re + + update_plan = plpy.prepare( + "UPDATE translation_proxy.cache SET result = $1, encoded = NULL WHERE id = $2", + [ 'text', 'bigint' ] ) + cookie = plpy.execute( "SELECT translation_proxy._promt_login()" )[0]['_promt_login'] + server_url = plpy.execute( "SELECT current_setting('translation_proxy.promt.server_url')" )[0]['current_setting'] + '/Services/v1/rest.svc/TranslateText?' + + curl = pycurl.Curl() + curl.setopt( pycurl.HTTPHEADER, [ 'Accept: application/json' ] ) + curl.setopt( pycurl.COOKIELIST, cookie ) + cursor = plpy.cursor( """ + SELECT id, source, target, q, profile + FROM translation_proxy.cache + WHERE api_engine = 'promt' AND result IS NULL + ORDER BY source, target, profile + FOR UPDATE SKIP LOCKED + """ ) + + while True: + row = cursor.fetch(1) + if not row: + break + buffer = StringIO() + curl.setopt( pycurl.WRITEDATA, buffer ) + curl.setopt( pycurl.URL, server_url + + urlencode({ 'from': row[0]['source'], + 'to': row[0]['target'], + 'text': row[0]['q'], + 'profile': row[0]['profile'] }) ) + curl.perform() + answer_code = curl.getinfo( pycurl.RESPONSE_CODE ) + if answer_code != 200 : + plpy.error( "Promt API returned %s\nBody is: %s" % ( answer_code, buffer.getvalue() )) + # normally this must return plain text + # translation is valid + # and yes, promt answers with quoted string like '"some text"' + t = re.sub( r'^"|"$', '', buffer.getvalue() ) + plpy.debug( "Promt answer is %s", t ) + plpy.execute( update_plan, [ t, row[0]['id'] ]) + buffer.close() + + curl.close() + buffer.close() +$$ language plpython2u; + +-- from, to, text[], profile +CREATE OR REPLACE FUNCTION translation_proxy.promt_translate_array( + src CHAR(2), dst CHAR(2), qs TEXT[], api_profile TEXT DEFAULT '') +RETURNS TEXT[] AS $BODY$ +DECLARE + new_row_ids BIGINT[]; -- saving here rows that needs translation + r TEXT[]; +BEGIN + SET SCHEMA 'translation_proxy'; + IF src = dst THEN + RAISE EXCEPTION '''source'' cannot be equal to ''target'' (see details)' + USING detail = 'Received equal ''source'' and ''target'': ' || source; + END IF; + IF array_length(qs, 1) = 0 THEN + RAISE EXCEPTION 'NEED SOMETHING TO TRANSLATE'; + END IF; + -- let Promt translates rows with NULL result + WITH created( saved_ids ) AS ( + INSERT INTO cache ( source, target, q, profile, api_engine ) + ( SELECT src, dst, unnest(qs), api_profile, 'promt' ) + ON CONFLICT (md5(q), source, target, api_engine, profile) DO + UPDATE SET source = src + -- this is dirty hack doing nothing with table only for returning all requested ids + RETURNING id ) + SELECT array_agg( saved_ids ) FROM created INTO new_row_ids; + IF ( current_setting('translation_proxy.promt.valid_from') IS NOT NULL + AND current_setting('translation_proxy.promt.valid_from')::timestamp < current_timestamp ) + AND ( current_setting('translation_proxy.promt.valid_until') IS NOT NULL + AND current_setting('translation_proxy.promt.valid_until')::timestamp > current_timestamp ) + AND array_length( new_row_ids, 1 ) > 0 THEN + PERFORM _promt_start_translation(); + END IF; + -- all translations are in the cache table now + SELECT array_agg( result ) FROM cache WHERE id IN ( SELECT unnest( new_row_ids ) ) INTO r; + RETURN r; +END; +$BODY$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION translation_proxy.promt_translate( + src CHAR(2), dst CHAR(2), qs TEXT, api_profile text DEFAULT '') RETURNS TEXT[] AS $BODY$ +BEGIN + SELECT translation_proxy.promt_translate_array( src, dst, ARRAY[qs], api_profile); +END; +$BODY$ LANGUAGE plpgsql; + +-- text, returns language, saves to cache +CREATE OR REPLACE FUNCTION translation_proxy._promt_detect_language(qs text) +RETURNS CHAR(10) AS $$ + import pycurl + from StringIO import StringIO + from urllib import urlencode + + plan = plpy.prepare("SELECT current_setting('translation_proxy.promt.server_url')") + server_url = "%s/Services/v1/rest.svc/DetectTextLanguage" % plpy.execute(plan)[0]['current_setting'] + cookie = plpy.execute("SELECT translation_proxy._promt_login()")[0]['_promt_login'] + + buffer = StringIO() + curl = pycurl.Curl() + curl.setopt( pycurl.URL, server_url + '?' + urlencode({ 'text': qs } )) + curl.setopt( pycurl.WRITEDATA, buffer ) + curl.setopt( pycurl.COOKIELIST, cookie ) + curl.perform() + answer_code = curl.getinfo( pycurl.RESPONSE_CODE ) + curl.close() + if answer_code != 200 : + plpy.error( "Promt API returned %s\nBody is: %s" % ( answer_code, buffer.getvalue() )) + answer = buffer.getvalue().decode('utf-8').replace('"','') + buffer.close() + plpy.debug("Answer len is %d, content is %s, unicode? %d" % ( len(answer), answer, isinstance(answer, unicode) ) ) + + if answer != 'kk': + return answer + + plpy.error("Promt don't know that language" ) +$$ language plpython2u; + +CREATE OR REPLACE FUNCTION translation_proxy.promt_detect_language(qs text) +RETURNS char(2) as $$ +DECLARE + lng TEXT; +BEGIN + IF qs = '' OR qs IS NULL THEN + RAISE EXCEPTION 'text cannot be equal empty'; + END IF; + -- checking cache + lng := translation_proxy._find_detected_language(qs, 'promt'); + IF lng IS NOT NULL THEN + RETURN lng; + END IF; + lng := translation_proxy._promt_detect_language(qs); + IF lng <> '' THEN + INSERT INTO translation_proxy.cache + ( lang, q, api_engine ) VALUES ( lng, qs, 'promt' ); + END IF; + RETURN lng; +END; +$$ LANGUAGE plpgsql; + +-- server-url, cookie-file-name +CREATE OR REPLACE FUNCTION translation_proxy._promt_logout() RETURNS BOOLEAN AS $$ + import pycurl + from StringIO import StringIO + + plan = plpy.prepare("SELECT current_setting('translation_proxy.promt.server_url')") + server_url = "%s/Services/auth/rest.svc/Logout" % plpy.execute(plan)[0]['current_setting'] + + buffer = StringIO() + cookie = plpy.execute( "SELECT creds FROM translation_proxy.authcache WHERE api_engine = 'promt'" )[0]['creds'] + if not cookie : + return False + + curl = pycurl.Curl() + curl.setopt( pycurl.URL, server_url ) + curl.setopt( pycurl.WRITEDATA, buffer ) + curl.setopt( pycurl.COOKIELIST, cookie ) + curl.perform() + answer_code = curl.getinfo( pycurl.RESPONSE_CODE ) + if answer_code == 200 : + plan = plpy.execute( """ + UPDATE translation_proxy.authcache + SET ( creds, updated ) = ( NULL, now() ) + WHERE api_engine = 'promt' + """ ) + curl.close() + return answer_code == 200 +$$ LANGUAGE plpython2u; diff --git a/install_promt_vars.sql b/install_promt_vars.sql new file mode 100644 index 0000000..4eaa523 --- /dev/null +++ b/install_promt_vars.sql @@ -0,0 +1,8 @@ +-- Promt API +ALTER DATABASE DBNAME SET translation_proxy.promt.login = 'YOUR_PROMT_LOGIN'; +ALTER DATABASE DBNAME SET translation_proxy.promt.password = 'YOUR_PROMT_PASSWORD'; +ALTER DATABASE DBNAME SET translation_proxy.promt.server_url = 'YOUR_PROMT_SERVER_URL'; +ALTER DATABASE DBNAME SET translation_proxy.promt.login_timeout = 'PROMT_LOGIN_TIMEOUT'; +ALTER DATABASE DBNAME SET translation_proxy.promt.cookie_file = 'PROMT_COOKIE_FILE'; +ALTER DATABASE DBNAME SET translation_proxy.promt.valid_from = 'PROMT_KEY_VALID_FROM'; +ALTER DATABASE DBNAME SET translation_proxy.promt.valid_until = 'PROMT_KEY_VALID_UNTIL'; diff --git a/migrations/2017-04-06.sql b/migrations/2017-04-06.sql new file mode 100644 index 0000000..ae0a227 --- /dev/null +++ b/migrations/2017-04-06.sql @@ -0,0 +1 @@ +ALTER SCHEMA google_translate RENAME TO translation_proxy; diff --git a/setup-example.yml b/setup-example.yml new file mode 100644 index 0000000..6157038 --- /dev/null +++ b/setup-example.yml @@ -0,0 +1,36 @@ +--- +:global: + :use: true + :database: DBNAME + :host: null + :port: null + :username: postgres + :password: null +:api: + :user: apiuser + :password: APIUSER-PASSWORD + :schema: v2 + :current_engine: :promt +:google: + :use: true + :api_key: GOOGLE_API_KEY + :valid_from: 2017-04-07 + :valid_until: 2018-04-07 +:promt: + :use: true + :login: PROMT_LOGIN + :password: PROMT_PASSWORD + :login_timeout: 30 minutes + :server_url: http://pts.promt.com/pts + :cookie_file: /tmp/promt_cookie.jar + :valid_from: 2017-04-07 + :valid_until: 2018-04-07 +:bing: + :use: true + :api_key: BING_TOKEN_1 + :key2: BING_TOKEN_2 + :token_expiration: 10 minutes + :subscription_id: BING_SUBSCRIPTION_ID + :account_name: BING_ACC_NAME + :valid_from: 2017-04-07 + :valid_until: 2018-04-07 diff --git a/setup.rb b/setup.rb new file mode 100755 index 0000000..13f315e --- /dev/null +++ b/setup.rb @@ -0,0 +1,83 @@ +#!/usr/bin/env ruby +if (ARGV[0] =~/^-?-h/) || (! File.exists?('setup.yml')) + puts <<-USAGEBANNER + + Usage: + copy the file 'setup-example.yml' to 'setup.yml' + and put there keys for APIs that you have + and creds for your database access. + Then start this script. + If some :use key is 'false' such engine will not be installed. + +USAGEBANNER + exit +end + +require 'yaml' +class Setup + @@cfg = YAML.load_file 'setup.yml' + def setup_core + # Global setup + [:global, :google, :promt, :bing].each do |engine| + vars = "install_#{ engine }_vars.sql" + script = "install_#{ engine }_core.sql" + @@cfg[engine][:script] = + File.read(vars).gsub( ' DBNAME ', " #{ @@cfg[:global][:database] } ") + "\n" + + File.read(script).gsub( ' DBNAME ', " #{ @@cfg[:global][:database] } ") + end + ENV['PGPASSWORD'] = @@cfg[:global][:password] unless @@cfg[:global][:password].nil? + ENV['PGUSER'] = @@cfg[:global][:username] unless @@cfg[:global][:username].nil? + # @psql = File.open '/tmp/setup-script.sql', 'w' + [:global, :google, :promt, :bing].each do |engine| + @psql = IO::popen( [ 'psql', @@cfg[:global][:database] ], 'w' ) + self.send( "setup_#{ engine }" ) if @@cfg[engine][:use] + @psql.close + end + self + end + + def setup_api + puts "\t\t ==== API v2 features" + api_code = File.read('install_api_vars.sql') + File.read('install_api.sql') + @psql = IO::popen( [ 'psql', @@cfg[:global][:database] ], 'w' ) + @psql.write api_code.gsub( /DBNAME/, @@cfg[:global][:database]) + .gsub( /APIUSER-PASSWORD/, @@cfg[:api][:password] ) + .gsub( /API_SCHEMA_NAME/, @@cfg[:api][:schema] ) + .gsub( /API_USERNAME/, @@cfg[:api][:user] ) + .gsub( /CURRENT_API_ENGINE/, @@cfg[:api][:current_engine].to_s ) + @psql.close + self + end + + def setup_global + puts "\t\t ==== Core features" + @psql.write @@cfg[:global][:script] + "\n" + end + + def setup_google + puts "\t\t ==== Setup Google API" + @psql.write @@cfg[:google][:script].gsub( /YOUR_GOOGLE_API_KEY/, @@cfg[:google][:api_key] ) + .gsub( /GOOGLE_VALID_FROM/, @@cfg[:google][:valid_from].to_s ) + .gsub( /GOOGLE_VALID_UNTIL/, @@cfg[:google][:valid_until].to_s ) + "\n" + end + + def setup_promt + puts "\t\t ==== Setup Promt API" + @psql.write @@cfg[:promt][:script].gsub(/YOUR_PROMT_LOGIN/, @@cfg[:promt][:login] ) + .gsub( /YOUR_PROMT_PASSWORD/, @@cfg[:promt][:password] ) + .gsub( /YOUR_PROMT_SERVER_URL/, @@cfg[:promt][:server_url] ) + .gsub( /PROMT_LOGIN_TIMEOUT/, @@cfg[:promt][:login_timeout] ) + .gsub( /PROMT_COOKIE_FILE/, @@cfg[:promt][:cookie_file]) + .gsub( /PROMT_KEY_VALID_FROM/, @@cfg[:promt][:valid_from].to_s ) + .gsub( /PROMT_KEY_VALID_UNTIL/, @@cfg[:promt][:valid_until].to_s ) + "\n" + end + + def setup_bing + puts "\t\t ==== Setup MS Bing API" + @psql.write @@cfg[:bing][:script].gsub( /YOUR_BING_API_KEY/, @@cfg[:bing][:api_key] ) + .gsub( /BING_TOKEN_EXPIRATION/, @@cfg[:bing][:token_expiration] ) + "\n" + end + +end + +Setup.new.setup_core.setup_api diff --git a/uninstall_api.sql b/uninstall_api.sql index 4e964bc..6ea57d4 100644 --- a/uninstall_api.sql +++ b/uninstall_api.sql @@ -14,4 +14,4 @@ DROP FUNCTION IF EXISTS v2.translate_array(CHAR(2), CHAR(2), JSON) -- REVOKE USAGE ON SCHEMA v2 FROM apiuser; -- DROP SCHEMA v2 CASCADE; --- DROP ROLE apiuser; \ No newline at end of file +-- DROP ROLE apiuser; diff --git a/uninstall_core.sql b/uninstall_core.sql index 7a022df..ac43e0c 100644 --- a/uninstall_core.sql +++ b/uninstall_core.sql @@ -1 +1,2 @@ -drop schema google_translate cascade; +DROP SCHEMA IF EXISTS google_translate CASCADE; +DROP SCHEMA IF EXISTS translation_proxy CASCADE;