From be71955469dd19fcc4136ec0e81880fa58cccb22 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Thu, 21 Sep 2023 03:09:42 +0530 Subject: [PATCH 01/11] changed webex meetings to push actioables to webex bot --- webex_UI/webex_meetings/index.html | 15 +- webex_UI/webex_meetings/index.js | 217 ++++++++++++++++++++--------- 2 files changed, 162 insertions(+), 70 deletions(-) diff --git a/webex_UI/webex_meetings/index.html b/webex_UI/webex_meetings/index.html index c6f1fd4..3d99e0e 100644 --- a/webex_UI/webex_meetings/index.html +++ b/webex_UI/webex_meetings/index.html @@ -18,8 +18,19 @@

BLAZE - WebEx Plugin

-

Research edition of WebEx Plugin, powered by BLAZE.

- +

Research edition of WebEx Plugin, powered by BLAZE.

+
+ + + +
+ +
+ +
+
+

Make sure to turn on WebEx Assistant prior to clicking Submit!

+
diff --git a/webex_UI/webex_meetings/index.js b/webex_UI/webex_meetings/index.js index 9895bb0..82cb342 100644 --- a/webex_UI/webex_meetings/index.js +++ b/webex_UI/webex_meetings/index.js @@ -3,7 +3,13 @@ let receiveTranscriptionOption = true; let transcript_final_result = {"transcript":""}; let meetings; let current_meeting; - +let actionables=""; +var ACCESS_TOKEN = ""; +let is_bot = false; +let botEmailID = ""; +let time_interval = 60000; +let interval = 1 +let botIntervalID; function summary() { // WARNING: For POST requests, body is set to null by browsers. @@ -28,13 +34,15 @@ function summary() { let summaryContainer = document.getElementById('summaryContainer') summaryContainer.innerHTML = `
${summary}
` - let actionables = response["result"]["actionables"] + actionables = response["result"]["actionables"] let actionablesContainer = document.getElementById('actionablesContainer') actionablesContainer.innerHTML = `
${actionables}
` let time = response["result"]["agenda"] let timeContainer = document.getElementById('timeContainer') timeContainer.innerHTML = `
${time}
` + + // index.html } }); @@ -45,77 +53,150 @@ function summary() { } +function bot_response() { + // WARNING: For POST requests, body is set to null by browsers. "blazetranscriptionbot@webex.bot" + + console.log("sending actionables to bot") + let data = JSON.stringify({ + "toPersonEmail": botEmailID , + "text": actionables, + + }); + + var xhr = new XMLHttpRequest(); + xhr.withCredentials = false; + + -webex = window.webex = Webex.init({ - config: { - logger: { - level: "debug", - }, - meetings: { - reconnection: { - enabled: true, + xhr.open("POST", "https://webexapis.com/v1/messages"); + xhr.setRequestHeader("Content-Type", "application/json"); + xhr.setRequestHeader('Authorization',`Bearer ${ACCESS_TOKEN}`); + xhr.send(data); + + +} + +// Send function to send keys/ids to the REST API +function submitForm() { + var webexId = document.getElementById("access-token").value; + botEmailID = document.getElementById("bot-email-id").value; + interval = document.getElementById("time-interval").value; + + if (botEmailID !== "") { + is_bot = true + if (interval !== ""){ + time_interval = 60000 * interval + } + } + if(is_bot===true){ + if(!botIntervalID){ + botIntervalID = setInterval(bot_response, time_interval); + } + + } + + // Call big scrip tto use WebexID key to register the mtg + + ACCESS_TOKEN = webexId; + document.getElementById("iniform").style.display = "none"; + registerMeeting(); + + +} + +function registerMeeting() { + + console.log("Entered script, got access token"); + console.log(ACCESS_TOKEN); + + initWebex(); + console.log("Initialized Webex"); + + setTimeout(function() { + register(); + console.log("Register meeting"); + }, 2000); + + +} + +function initWebex(){ + webex = window.webex = Webex.init({ + config: { + logger: { + level: "debug", }, - enableRtx: true, - experimental: { - enableUnifiedMeetings: true, + meetings: { + reconnection: { + enabled: true, + }, + enableRtx: true, + experimental: { + enableUnifiedMeetings: true, + }, }, + // Any other sdk config we need + }, + credentials: { + access_token: + ACCESS_TOKEN, }, - // Any other sdk config we need - }, - credentials: { - access_token: - "", - }, -}); - -webex.once("ready", () => { - console.log("Authentication#initWebex() :: Webex Ready"); -}); - -webex.meetings.register().then(() => { - console.log("successful registered"); - webex.meetings - .syncMeetings() - .then( - () => - new Promise((resolve) => { - setTimeout(() => resolve(), 3000); - }) - ) - .then(() => { - console.log( - "MeetingsManagement#collectMeetings() :: successfully collected meetings" - ); - meetings = webex.meetings.getAllMeetings(); - - if (webex.meetings.registered) { - console.log(meetings); - current_meeting = meetings[Object.keys(meetings)[0]]; - console.log(current_meeting); - current_meeting.on( - "meeting:receiveTranscription:started", - (payload) => { - if (payload["type"]=="transcript_final_result"){ - transcript_final_result["transcript"] = transcript_final_result["transcript"] + ", " + payload["transcription"]; + }); + + webex.once("ready", () => { + console.log("Authentication#initWebex() :: Webex Ready"); + }); +} + + + +function register(){ + webex.meetings.register().then(() => { + console.log("successful registered"); + webex.meetings + .syncMeetings() + .then( + () => + new Promise((resolve) => { + setTimeout(() => resolve(), 3000); + }) + ) + .then(() => { + console.log( + "MeetingsManagement#collectMeetings() :: successfully collected meetings" + ); + meetings = webex.meetings.getAllMeetings(); + + if (webex.meetings.registered) { + console.log(meetings); + current_meeting = meetings[Object.keys(meetings)[0]]; + console.log(current_meeting); + current_meeting.on( + "meeting:receiveTranscription:started", + (payload) => { + if (payload["type"]=="transcript_final_result"){ + transcript_final_result["transcript"] = transcript_final_result["transcript"] + ", " + payload["transcription"]; + + } + + console.log(transcript_final_result) } - - console.log(transcript_final_result) - - } - ); - } - const joinOptions = { - moveToResource: false, - resourceId: webex.devicemanager._pairedDevice - ? webex.devicemanager._pairedDevice.identity.id - : undefined, - receiveTranscription: receiveTranscriptionOption, - }; - - current_meeting.join(joinOptions); - }); -}); + ); + } + const joinOptions = { + moveToResource: false, + resourceId: webex.devicemanager._pairedDevice + ? webex.devicemanager._pairedDevice.identity.id + : undefined, + receiveTranscription: receiveTranscriptionOption, + }; + + current_meeting.join(joinOptions); + }); + }); +} + -const intervalID = setInterval(summary, 100000); +const intervalID = setInterval(summary, 10000); From 58d597083bc5a3816f494496377c96e890429547 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Thu, 19 Oct 2023 06:39:38 +0530 Subject: [PATCH 02/11] panoptica gpt integration --- backend/config.py | 7 +- backend/datasets/common/Swagger.py | 131 +++++++++++ backend/datasets/common/escherauth.py | 313 ++++++++++++++++++++++++++ backend/models/common/LLM.py | 4 + backend/models/common/OpenAI.py | 163 +++++++++++++- backend/server/core/model_views.py | 65 +++++- backend/server/core/routes.py | 12 +- webex_UI/webex_bot/cmds.py | 20 +- webex_UI/webex_bot/help.py | 180 ++++++++++++++- webex_UI/webex_bot/main.py | 7 +- 10 files changed, 888 insertions(+), 14 deletions(-) create mode 100644 backend/datasets/common/Swagger.py create mode 100644 backend/datasets/common/escherauth.py create mode 100644 backend/models/common/LLM.py diff --git a/backend/config.py b/backend/config.py index 2689e32..d81a05d 100644 --- a/backend/config.py +++ b/backend/config.py @@ -41,11 +41,12 @@ class TestingConfig(Config): PREF_REST_API = "http://0.0.0.0:" DATABASE_URI = 'test' TESTING = True - WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "") - WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "") + WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "NGM5N2U2MDgtMzc4YS00NjY1LWFjN2MtMjBhNTM4MTgzNzAyOWNkMmI3YTYtYjJk_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") + WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "YTAzMGJmYmQtY2I1Ni00MGRmLWJlNWYtNDJjNjY1NmFjZjljM2RjODhmY2QtY2M4_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") - OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "") + OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "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") all_modules = {"openai":"backend.server.utils.openai_utils"} + BOT_EMAIL = 'blazetranscriptionbot@webex.bot' @classmethod diff --git a/backend/datasets/common/Swagger.py b/backend/datasets/common/Swagger.py new file mode 100644 index 0000000..41e9f16 --- /dev/null +++ b/backend/datasets/common/Swagger.py @@ -0,0 +1,131 @@ +import requests +import json +import os.path as path +import datetime +from backend.config import TestingConfig +from backend.datasets.common.escherauth import EscherRequestsAuth + + +class Swagger: + functions_supported = ["search","summarization","functions"] + + def __init__(self): + self.date_format = '%Y%m%dT%H%M%SZ' + self.date_string = datetime.datetime.utcnow().strftime(self.date_format) + self.date = datetime.datetime.strptime(self.date_string, self.date_format) + + self._class_name = 'Swagger' + self._dataset_name = 'Swagger' + + ######## Change according to swagger link + self.swagger_url = "https://appsecurity.cisco.com/api/v2/api-docs" + self.headers = {'X-Escher-Date': self.date_string, + 'host': 'appsecurity.cisco.com', + 'content-type': 'application/json'} + self.auth=EscherRequestsAuth("global/services/portshift_request", + {'current_time': self.date}, + {'api_key': "2a797523-3934-4698-9975-af13de9e15ca", 'api_secret': "kSN59Kje1AiOfFaTe+itdHiPUnFUIxC1bOs4gJ1kCnk="}) + ############## + self.file_name = "webex_transcripts.json" + self.swagger_json = self.fetch_swagger_json() + self.api_info = self.extract_api_information(self.swagger_json) + + def _get_class_name(self): + return self._class_name + + def _get_dataset_name(self): + return self._dataset_name + + def fetch_swagger_json(self): + print(self.auth) + response = requests.get(self.swagger_url, + headers=self.headers, + auth=self.auth) + + if response.status_code == 200: + return response.json() + else: + print(f"Failed to fetch Swagger JSON. Status code: {response.status_code}") + return None + + def dereference_schema(self,ref, definitions): + """ + Dereference a $ref pointer to fetch the actual schema from definitions. + + :param ref: The $ref string. + :param definitions: The Swagger definitions section. + :return: The dereferenced schema. + """ + if not ref.startswith("#/definitions/"): + return {} + ref_name = ref.split("/")[2] + return definitions.get(ref_name, {}) + + def extract_param_type(self,param, definitions): + """ + Extract the type of a parameter from Swagger data. + + :param param: The Swagger parameter data. + :param definitions: The Swagger definitions section. + :return: The parameter type and description and item type + to take care of the case where parameter type is array + """ + # Direct type from parameter + param_items = '' + param_type = param.get('type') + param_description = param.get('description', '') + if param_type == 'array': + param_items = param.get('items', '') + # If not found, look inside the schema + if 'schema' in param: + schema = param['schema'] + if '$ref' in schema: + schema = self.dereference_schema(schema['$ref'], definitions) + param_type = schema.get('type', param_type) + param_description = schema.get('description', param_description) + + return param_type or 'unknown', param_description, param_items + + def extract_api_information(self,swagger_data): + api_info = [] + + definitions = swagger_data.get('definitions', {}) + # api_info_description = swagger_data.get('info', {}).get('description', '') + + paths = swagger_data.get('paths', {}) + for path, methods in paths.items(): + for method, details in methods.items(): + function_name = details.get('operationId', method + path.replace("/", "_")) + + # Extract parameters, their types, and descriptions + params = details.get('parameters', []) + properties = {} + required = [] + for param in params: + param_type, param_description, param_items = self.extract_param_type(param, definitions) + if param_type == 'array': + properties[param['name']] = {'type': param_type, 'description': param_description, 'items': param_items} + else: + properties[param['name']] = {'type': param_type, 'description': param_description} + if param.get('required'): + required.append(param['name']) + summary = details.get('summary') + # Prioritize the description from the 'info' section + description = details.get('description', '') + tags = details.get('tags', []) + + api_info.append({ + 'name': function_name, + 'description': description if description else summary, + 'parameters': { + 'type': 'object', + 'properties': properties, + 'required': required + }, + 'summary': summary, + 'path': path, + 'tags': tags + }) + + return api_info + diff --git a/backend/datasets/common/escherauth.py b/backend/datasets/common/escherauth.py new file mode 100644 index 0000000..a22c871 --- /dev/null +++ b/backend/datasets/common/escherauth.py @@ -0,0 +1,313 @@ +#code taken from https://github.com/emartech/escher-python and updated to python3 + +import datetime +import hmac +import requests +import urllib.request, urllib.parse, urllib.error +import re + +from hashlib import sha256, sha512 + +try: + from urllib.parse import urlparse, parse_qsl, urljoin + from urllib.parse import quote +except: + from urllib.parse import urlparse, parse_qsl, urljoin, quote + + +class EscherException(Exception): + pass + + +class EscherRequestsAuth(requests.auth.AuthBase): + def __init__(self, credential_scope, options, client): + self.escher = Escher(credential_scope, options) + self.client = client + + def __call__(self, request): + return self.escher.sign(request, self.client) + + +class EscherRequest(): + _uri_regex = re.compile('([^?#]*)(\?(.*))?') + + def __init__(self, request): + self.type = type(request) + self.request = request + self.prepare_request_uri() + + def request(self): + return self.request + + def prepare_request_uri(self): + if self.type is requests.models.PreparedRequest: + self.request_uri = self.request.path_url + if self.type is dict: + self.request_uri = self.request['uri'] + match = re.match(self._uri_regex, self.request_uri) + self.uri_path = match.group(1) + self.uri_query = match.group(3) + + def method(self): + if self.type is requests.models.PreparedRequest: + return self.request.method + if self.type is dict: + return self.request['method'] + + def host(self): + if self.type is requests.models.PreparedRequest: + return self.request.host + if self.type is dict: + return self.request['host'] + + def path(self): + return self.uri_path + + def query_parts(self): + return parse_qsl((self.uri_query or '').replace(';', '%3b'), True) + + def headers(self): + if self.type is requests.models.PreparedRequest: + headers = [] + for key, value in self.request.headers.items(): + headers.append([key, value]) + return headers + if self.type is dict: + return self.request['headers'] + + def body(self): + if self.type is requests.models.PreparedRequest: + return self.request.body or '' + if self.type is dict: + return self.request.get('body', '') + + def add_header(self, header, value): + if self.type is requests.models.PreparedRequest: + self.request.headers[header] = value + if self.type is dict: + self.request['headers'].append((header, value)) + + +class AuthParams: + def __init__(self, data, vendor_key): + self._init_data(data, 'X-' + vendor_key + '-') + + def _init_data(self, data, prefix): + self._data = {} + for (k, v) in data: + if k.startswith(prefix): + self._data[k.replace(prefix, '').lower()] = v + + def get(self, name): + if name not in self._data: + raise EscherException('Missing authorization parameter: ' + name) + return self._data[name] + + def get_signed_headers(self): + return self.get('signedheaders').lower().split(';') + + def get_algo_data(self): + data = self.get('algorithm').split('-') + if len(data) != 3: + raise EscherException('Malformed Algorithm parameter') + return data + + def get_algo_prefix(self): + return self.get_algo_data()[0] + + def get_hash_algo(self): + return self.get_algo_data()[2].upper() + + def get_credential_data(self): + data = self.get('credentials').split('/', 2) + if len(data) != 3: + raise EscherException('Malformed Credentials parameter') + return data + + def get_credential_key(self): + return self.get_credential_data()[0] + + def get_credential_date(self): + return datetime.datetime.strptime(self.get_credential_data()[1], '%Y%m%d') + + def get_credential_scope(self): + return self.get_credential_data()[2] + + def get_expires(self): + return int(self.get('expires')) + + def get_request_date(self): + return datetime.datetime.strptime(self.get('date'), '%Y%m%dT%H%M%SZ') + + +class AuthenticationValidator: + def validate_mandatory_signed_headers(self, headers_to_sign): + if 'host' not in headers_to_sign: + raise EscherException('Host header is not signed') + + def validate_hash_algo(self, hash_algo): + if hash_algo not in ('SHA256', 'SHA512'): + raise EscherException('Only SHA256 and SHA512 hash algorithms are allowed') + + def validate_dates(self, current_date, request_date, credential_date, expires, clock_skew): + if request_date.strftime('%Y%m%d') != credential_date.strftime('%Y%m%d'): + raise EscherException('The request date and credential date do not match') + + min_date = current_date - datetime.timedelta(seconds=(clock_skew + expires)) + max_date = current_date + datetime.timedelta(seconds=clock_skew) + if request_date < min_date or request_date > max_date: + raise EscherException('Request date is not within the accepted time interval') + + def validate_credential_scope(self, expected, actual): + if actual != expected: + raise EscherException('Invalid credential scope (provided: ' + actual + ', required: ' + expected + ')') + + def validate_signature(self, expected, actual): + if expected != actual: + raise EscherException('The signatures do not match (provided: ' + actual + ', calculated: ' + expected + ')') + + +class Escher: + _normalize_path = re.compile('([^/]+/\.\./?|/\./|//|/\.$|/\.\.$)') + + def __init__(self, credential_scope, options={}): + self.credential_scope = credential_scope + self.algo_prefix = options.get('algo_prefix', 'ESR') + self.vendor_key = options.get('vendor_key', 'Escher') + self.hash_algo = options.get('hash_algo', 'SHA256') + self.current_time = options.get('current_time', datetime.datetime.utcnow()) + self.auth_header_name = options.get('auth_header_name', 'X-Escher-Auth') + self.date_header_name = options.get('date_header_name', 'X-Escher-Date') + self.clock_skew = options.get('clock_skew', 300) + self.algo = self.create_algo() + self.algo_id = self.algo_prefix + '-HMAC-' + self.hash_algo + + def sign(self, r, client, headers_to_sign=[]): + request = EscherRequest(r) + + for header in [self.date_header_name.lower(), 'host']: + if header not in headers_to_sign: + headers_to_sign.append(header) + + signature = self.generate_signature(client['api_secret'], request, headers_to_sign, self.current_time) + request.add_header(self.auth_header_name, ", ".join([ + self.algo_id + ' Credential=' + client['api_key'] + '/' + self.short_date( + self.current_time) + '/' + self.credential_scope, + 'SignedHeaders=' + self.prepare_headers_to_sign(headers_to_sign), + 'Signature=' + signature + ])) + return request.request + + def authenticate(self, r, key_db): + request = EscherRequest(r) + + auth_params = AuthParams(request.query_parts(), self.vendor_key) + validator = AuthenticationValidator() + + validator.validate_mandatory_signed_headers(auth_params.get_signed_headers()) + validator.validate_hash_algo(auth_params.get_hash_algo()) + validator.validate_dates( + self.current_time, + auth_params.get_request_date(), + auth_params.get_credential_date(), + auth_params.get_expires(), + self.clock_skew + ) + validator.validate_credential_scope(self.credential_scope, auth_params.get_credential_scope()) + + if auth_params.get_credential_key() not in key_db: + raise EscherException('Invalid Escher key') + + calculated_signature = self.generate_signature( + key_db[auth_params.get_credential_key()], request, + auth_params.get_signed_headers(), + auth_params.get_request_date() + ) + validator.validate_signature(calculated_signature, auth_params.get('signature')) + + return auth_params.get_credential_key() + + def hmac_digest(self, key, message, is_hex=False): + if not isinstance(key, bytes): + key = key.encode('utf-8') + digest = hmac.new(key, message.encode('utf-8'), self.algo) + if is_hex: + return digest.hexdigest() + return digest.digest() + + def generate_signature(self, api_secret, req, headers_to_sign, current_time): + canonicalized_request = self.canonicalize(req, headers_to_sign) + string_to_sign = self.get_string_to_sign(canonicalized_request, current_time) + + signing_key = self.hmac_digest(self.algo_prefix + api_secret, self.short_date(current_time)) + for data in self.credential_scope.split('/'): + signing_key = self.hmac_digest(signing_key, data) + + return self.hmac_digest(signing_key, string_to_sign, True) + + def canonicalize(self, req, headers_to_sign): + return "\n".join([ + req.method(), + self.canonicalize_path(req.path()), + self.canonicalize_query(req.query_parts()), + self.canonicalize_headers(req.headers(), headers_to_sign), + '', + self.prepare_headers_to_sign(headers_to_sign), + self.algo(req.body().encode('utf-8')).hexdigest() + ]) + + def canonicalize_path(self, path): + changes = 1 + while changes > 0: + path, changes = self._normalize_path.subn('/', path, 1) + return path + + def canonicalize_headers(self, headers, headers_to_sign): + headers_list = [] + for key, value in iter(sorted(headers)): + if key.lower() in headers_to_sign: + headers_list.append(key.lower() + ':' + self.normalize_white_spaces(value)) + return "\n".join(sorted(headers_list)) + + def normalize_white_spaces(self, value): + index = 0 + value_normalized = [] + pattern = re.compile(r'\s+') + for part in value.split('"'): + if index % 2 == 0: + part = pattern.sub(' ', part) + value_normalized.append(part) + index += 1 + return '"'.join(value_normalized).strip() + + def canonicalize_query(self, query_parts): + safe = "~+!'()*" + query_list = [] + for key, value in query_parts: + if key == 'X-' + self.vendor_key + '-Signature': + continue + query_list.append(quote(key, safe=safe) + '=' + quote(value, safe=safe)) + return "&".join(sorted(query_list)) + + def get_string_to_sign(self, canonicalized_request, current_time): + return "\n".join([ + self.algo_id, + self.long_date(current_time), + self.short_date(current_time) + '/' + self.credential_scope, + self.algo(canonicalized_request.encode('utf-8')).hexdigest() + ]) + + def create_algo(self): + if self.hash_algo == 'SHA256': + return sha256 + if self.hash_algo == 'SHA512': + return sha512 + + def long_date(self, time): + return time.strftime('%Y%m%dT%H%M%SZ') + + def short_date(self, time): + return time.strftime('%Y%m%d') + + def prepare_headers_to_sign(self, headers_to_sign): + return ";".join(sorted(headers_to_sign)) \ No newline at end of file diff --git a/backend/models/common/LLM.py b/backend/models/common/LLM.py new file mode 100644 index 0000000..4a1d64f --- /dev/null +++ b/backend/models/common/LLM.py @@ -0,0 +1,4 @@ + + + +class LLMs(): diff --git a/backend/models/common/OpenAI.py b/backend/models/common/OpenAI.py index f382f7a..1b374ff 100644 --- a/backend/models/common/OpenAI.py +++ b/backend/models/common/OpenAI.py @@ -1,5 +1,26 @@ import openai from flask import current_app +import openai +import os +import inspect +import re +import json +from dotenv import load_dotenv +import requests +#from algebra import add, sub, mul +#from sqlmethods import get_top_k_entries, setup_database, prompt_append +# from pylogic import create_problem, add_exact_position_constraint, \ +# add_below_constraint, add_above_constraint, check_option +from typing import Callable, Dict +import datetime +from collections import defaultdict + +# access_key = "2a797523-3934-4698-9975-af13de9e15ca" +# secret_key = "kSN59Kje1AiOfFaTe+itdHiPUnFUIxC1bOs4gJ1kCnk=" +# date_format = '%Y%m%dT%H%M%SZ' +# date_string = datetime.datetime.utcnow().strftime(date_format) +# date = datetime.datetime.strptime(date_string, date_format) +# openai_functions, swagger_data, tag_dict, classifier_tag = get_panoptica_data() def get_openAI_info(): """ @@ -21,9 +42,11 @@ def get_openAI_info(): return model_info class OpenAI(): - tasks_supported = ["actionables","summarization"] + tasks_supported = ["actionables","summarization","chat"] + model = "gpt-3.5-turbo-0613" def __init__(self): + self._info = get_openAI_info() def _get_model_info(self): @@ -77,4 +100,140 @@ def _summarize_text(self, text_to_summarize): def get_actionables(self,text): response = self.gpt_analysis("actionables",text) - return response['choices'][0]['text'] \ No newline at end of file + return response['choices'][0]['text'] + + def parse_docstring(self,function: Callable) -> Dict: + doc = inspect.getdoc(function) + + function_description = re.search(r'(.*?)Parameters', doc, re.DOTALL).group(1).strip() + parameters_description = re.findall(r'(\w+)\s*:\s*([\w\[\], ]+)\n(.*?)(?=\n\w+\s*:\s*|\nReturns|\nExample$)', doc, re.DOTALL) + + returns_description_match = re.search(r'Returns\n(.*?)(?=\n\w+\s*:\s*|$)', doc, re.DOTALL) + returns_description = returns_description_match.group(1).strip() if returns_description_match else None + + example = re.search(r'Example\n(.*?)(?=\n\w+\s*:\s*|$)', doc, re.DOTALL) + example_description = example.group(1).strip() if example else None + + signature_params = list(inspect.signature(function).parameters.keys()) + properties = {} + required = [] + for name, type, description in parameters_description: + name = name.strip() + type = type.strip() + description = description.strip() + + required.append(name) + properties[name] = { + "type": type, + "description": description, + } + if len(signature_params) != len(required): + print(f'Signature params : {signature_params}, Required params : {required}') + raise ValueError(f"Number of parameters in function signature ({signature_params}) does not match the number of parameters in docstring ({required})") + for param in signature_params: + if param not in required: + raise ValueError(f"Parameter '{param}' in function signature is missing in the docstring") + + parameters = { + "type": "object", + "properties": properties, + "required": required, + } + function_dict = { + "name": function.__name__, + "description": function_description, + "parameters": parameters, + "returns": returns_description, + # "example": example_description, + } + + return function_dict + + + def run_with_functions(self,messages, function_dicts): + response = '' + print(f"within run_with_functions : {messages} and {function_dicts}") + messages[0]["role"] = "system" + response = openai.ChatCompletion.create( + model=self.model, + messages=messages, + functions=function_dicts, + temperature=0, + ) + + return response + + def get_role_message_dict(role, content=None, fn_name=None, arguments=None, result=None): + message_dict = {"role":role} + if role == "user": + message_dict["content"] = content + elif role == "assistant": + message_dict["content"] = content + message_dict["function_call"] = {} + message_dict["function_call"]["name"] = fn_name + message_dict["function_call"]["arguments"] = arguments + elif role == "function": + message_dict["name"] = fn_name + message_dict["content"] = f'{{"result": {str(result)} }}' + return message_dict + + + def translate_to_openai_functions(self,api_info): + openai_functions = [] + tag_dict = defaultdict(list) + count = 0 + for api in api_info: + if not api['description']: + print(api['name']+' does not have a description! and is using summary') + count += 1 + function_info = { + 'name': api['name'], + 'description': api['description'] if api['description'] else api['summary'], + 'parameters': api['parameters'], + 'path': api['path'], + } + openai_functions.append(function_info) + + for tag in api['tags']: + tag_dict[tag].append(function_info) + + print(f'Total number of api endpoints without description is {count}') + return openai_functions, tag_dict + + def translate_swagger_data(self,swagger_dataset,description_text): + api_info = swagger_dataset.api_info + openai_functions, tag_dict = self.translate_to_openai_functions(api_info) + + + description_text = description_text + for index, tmp_dict in enumerate(swagger_dataset.swagger_json['tags']): + description_text += f'{tmp_dict["name"]} is returned when the following description is satisfied {tmp_dict["description"]},' + + description_text = description_text[:-1] + '.' + + classifier_tag = { + 'name': "classifies_the_tag", + 'description': description_text, + 'method': 'get', + 'path': '/', + 'tags': 'classifier' + } + + openai_functions.append(classifier_tag) + + return openai_functions, swagger_dataset.swagger_json, tag_dict, classifier_tag + + def run_with_functions(self,messages,function_dicts): + response = '' + print(f"within run_with_functions : {messages} and {function_dicts}") + # messages[0]["role"] = "system" + openai.api_key = current_app.config.get('OPENAPI_KEY') + response = openai.ChatCompletion.create( + model=self.model, + messages=messages, + functions=function_dicts, + temperature=0, + ) + print(type(response)) + print(response) + return response \ No newline at end of file diff --git a/backend/server/core/model_views.py b/backend/server/core/model_views.py index b094fe5..6d5faba 100644 --- a/backend/server/core/model_views.py +++ b/backend/server/core/model_views.py @@ -6,7 +6,8 @@ from flask_restful import Resource, request from flask import current_app from backend.params.specifications import Specifications -from backend.server.utils.helpers import get_model_object_from_name +from backend.server.utils.helpers import get_model_object_from_name,get_object_from_name + class ModelsList(Resource): def get(self): @@ -297,4 +298,64 @@ def post(self): res, latency = model.file_search(query) - return {'result': res, 'latency': latency}, 200 \ No newline at end of file + return {'result': res, 'latency': latency}, 200 + + +class GetFunctionsFromSwaggerData(Resource): + + def post(self): + print("gettiong callsed") + request_json = request.json + if any(param not in request_json for param in ['model', 'dataset', "description_text"]): + return "Malformed request", 400 + + model_name = request_json['model'] + model = get_model_object_from_name(model_name, 'actionables', current_app.config.get("server_config")) + dataset_name = request_json['dataset'] + dataset_obj = get_object_from_name(dataset_name, current_app.config.get("server_config"), 'dataset') + description_text = request_json['description_text'] + """Get by data""" + functions, swagger_data, tag_dict, classifier_tag = model.translate_swagger_data(dataset_obj,description_text) + + response = { + "functions": functions, + "swagger_data": swagger_data, + "tag_dict": tag_dict, + "classifier_tag":classifier_tag + } + + filepath = path.join(current_app.config.get("FILES_DIR"), "functions.json") + print(filepath) + with open(filepath, "w") as outfile: + json.dump(response, outfile) + + return response + +class RunWithFunctions(Resource): + + def post(self): + request_json = request.json + if any(param not in request_json for param in ['model']): + return "Malformed request", 400 + filepath = open(path.join(current_app.config.get("FILES_DIR"), "functions.json")) + data = json.load(filepath) + model_name = request_json['model'] + model = get_model_object_from_name(model_name, 'actionables', current_app.config.get("server_config")) + messages = request_json['messages'] + response = model.run_with_functions(messages,data["functions"]) + return response + +class RunWithFunctions(Resource): + + def post(self): + request_json = request.json + if any(param not in request_json for param in ['model']): + return "Malformed request", 400 + filepath = open(path.join(current_app.config.get("FILES_DIR"), "functions.json")) + data = json.load(filepath) + model_name = request_json['model'] + model = get_model_object_from_name(model_name, 'actionables', current_app.config.get("server_config")) + messages = request_json['messages'] + response = model.run_with_functions(messages,data["tag_dict"]['dashboard-controller']) + return response + \ No newline at end of file diff --git a/backend/server/core/routes.py b/backend/server/core/routes.py index 5103b2a..1b12001 100644 --- a/backend/server/core/routes.py +++ b/backend/server/core/routes.py @@ -1,5 +1,5 @@ from .views import Default,ResetServer,Models,Config,TestDynamicApis -from .model_views import ModelsList,ModelDetail,ModelInitilize,ModelSearch,ModelSummary, ModelActionables +from .model_views import ModelsList,ModelDetail,ModelInitilize,ModelSearch,ModelSummary, ModelActionables, GetFunctionsFromSwaggerData, RunWithFunctions from .dataset_views import DatasetsList,DatasetFilesList,DatasetFilesDetails, ListMeetingTranscripts @@ -55,7 +55,15 @@ "endpoint": ['/actionables'], "resource":ModelActionables }, - + { + "endpoint": ['/functions'], + "resource":GetFunctionsFromSwaggerData + }, + { + "endpoint": ['/run_function'], + "resource":RunWithFunctions + }, + { "endpoint": ['/list_webex_meeting_transcripts'], "resource":ListMeetingTranscripts diff --git a/webex_UI/webex_bot/cmds.py b/webex_UI/webex_bot/cmds.py index 968e001..ad74da3 100644 --- a/webex_UI/webex_bot/cmds.py +++ b/webex_UI/webex_bot/cmds.py @@ -11,7 +11,7 @@ Text, Image, HorizontalAlignment from webexteamssdk.models.cards.actions import OpenUrl -from help import SummarizeTranscripts,SearchTranscripts,ListMeetingTranscripts,ActionablesTranscripts +from help import SummarizeTranscripts,SearchTranscripts,ListMeetingTranscripts,ActionablesTranscripts, RunFunction class EmptySpace(Command): def __init__(self): @@ -100,4 +100,20 @@ def __init__(self, transcriptFileName): def execute(self, message, attachment_actions, query_info): res = ActionablesTranscripts(self.transcriptFileName,message) - return f"{res}" \ No newline at end of file + return f"{res}" + + +class Panoptica(Command): + def __init__(self, functions): + super().__init__( + command_keyword="function", + help_message="function: write query with argument to run function", + card = None + ) + self.functions = functions + + def execute(self, message, attachment_actions, query_info): + res = RunFunction(message, self.functions) + return f"{res}" + + \ No newline at end of file diff --git a/webex_UI/webex_bot/help.py b/webex_UI/webex_bot/help.py index 4368138..5de9157 100644 --- a/webex_UI/webex_bot/help.py +++ b/webex_UI/webex_bot/help.py @@ -7,8 +7,18 @@ from constants import CONSTANTS import requests import json +import datetime +import os +import inspect +import re +from dotenv import load_dotenv +from typing import Callable, Dict +from collections import defaultdict +from escherauth import EscherRequestsAuth +base_url = "https://appsecurity.cisco.com/api" + def LoadTranscripts(): transcriptFileName = "webex_transcripts.json" return transcriptFileName @@ -33,6 +43,22 @@ def InitilizeTranscripts(transcriptFileName): response = requests.request("POST", url, headers=headers, data=payload) print(response.text) +def InitilizeSwaggerFunctions(): + url = CONSTANTS.get("webex_api_endpoint")+"/functions" + + payload = json.dumps({ + "model": "OpenAI", + "dataset":"Swagger", + "description_text":"Used when given a question about panoptica to identify the tag of the api that needs to be referenced." + }) + headers = { + 'Content-Type': 'application/json' + } + + response = requests.request("POST", url, headers=headers, data=payload) + print(response.text) + return json.loads(response.text) + def ListMeetingTranscripts(): response_string = "" @@ -140,4 +166,156 @@ def ActionablesTranscripts(transcriptFileName,message): response = requests.request("POST", CONSTANTS.get("webex_api_endpoint")+"/actionables", headers=headers, data=payload).json() print(response) res = " \n ".join(response["result"].split("|")) - return res \ No newline at end of file + return res + + +def get_role_message_dict(role, content=None, fn_name=None, arguments=None, result=None): + message_dict = {"role":role} + if role == "user": + message_dict["content"] = content + elif role == "assistant": + message_dict["content"] = content + message_dict["function_call"] = {} + message_dict["function_call"]["name"] = fn_name + message_dict["function_call"]["arguments"] = arguments + elif role == "function": + message_dict["name"] = fn_name + message_dict["content"] = f'{{"result": {str(result)} }}' + return message_dict + + +def panoptica_call_functions(full_url): + access_key = "2a797523-3934-4698-9975-af13de9e15ca" + secret_key = "kSN59Kje1AiOfFaTe+itdHiPUnFUIxC1bOs4gJ1kCnk=" + date_format = '%Y%m%dT%H%M%SZ' + date_string = datetime.datetime.utcnow().strftime(date_format) + date = datetime.datetime.strptime(date_string, date_format) + print(f'the full url is {full_url}') + response = requests.get(full_url, + headers={'X-Escher-Date': date_string, + 'host': 'appsecurity.cisco.com', + 'content-type': 'application/json'}, + auth=EscherRequestsAuth("global/services/portshift_request", + {'current_time': date}, + {'api_key': access_key, 'api_secret': secret_key})) + + print("response.status_code = " + str(response.status_code)) + return response + +def parse_docstring(function: Callable) -> Dict: + doc = inspect.getdoc(function) + + function_description = re.search(r'(.*?)Parameters', doc, re.DOTALL).group(1).strip() + parameters_description = re.findall(r'(\w+)\s*:\s*([\w\[\], ]+)\n(.*?)(?=\n\w+\s*:\s*|\nReturns|\nExample$)', doc, re.DOTALL) + + returns_description_match = re.search(r'Returns\n(.*?)(?=\n\w+\s*:\s*|$)', doc, re.DOTALL) + returns_description = returns_description_match.group(1).strip() if returns_description_match else None + + example = re.search(r'Example\n(.*?)(?=\n\w+\s*:\s*|$)', doc, re.DOTALL) + example_description = example.group(1).strip() if example else None + + signature_params = list(inspect.signature(function).parameters.keys()) + properties = {} + required = [] + for name, type, description in parameters_description: + name = name.strip() + type = type.strip() + description = description.strip() + + required.append(name) + properties[name] = { + "type": type, + "description": description, + } + if len(signature_params) != len(required): + print(f'Signature params : {signature_params}, Required params : {required}') + raise ValueError(f"Number of parameters in function signature ({signature_params}) does not match the number of parameters in docstring ({required})") + for param in signature_params: + if param not in required: + raise ValueError(f"Parameter '{param}' in function signature is missing in the docstring") + + parameters = { + "type": "object", + "properties": properties, + "required": required, + } + function_dict = { + "name": function.__name__, + "description": function_description, + "parameters": parameters, + "returns": returns_description, + # "example": example_description, + } + + return function_dict + +def run_with_functions(messages): + url = "http://127.0.0.1:3000//run_function" + + payload = json.dumps({ + "model": "OpenAI", + "messages": messages + }) + headers = { + 'Content-Type': 'application/json' + } + + response = requests.request("POST", url, headers=headers, data=payload) + + print(response.text) + + return json.loads(response.text) + + +def prompt_with_functions(prompt, functions, function_dict): + # setup_database() + #prompt += prompt_append() + prompt = "You are an expert in Panoptica, which is a tool to give security insights in Kubernetes Clusters, API security. You are not aware of anything else. All queries should either use one of the APIs provided or should ask the user to rephrase the qurey: " + prompt + output = [] + fn_names_dict = {} + + if function_dict: + function_dicts = functions + for fn in functions: + fn_names_dict[fn['name']] = fn + else: + for fn in functions: + fn_names_dict[fn.__name__] = fn + function_dicts = [parse_docstring(fun) for fun in functions] + # print(function_dicts) + messages = [get_role_message_dict("user", content=(prompt))] + + response = run_with_functions(messages) + + if response["choices"][0]["finish_reason"] == "stop": + print("Received STOP signal from GPT.") + print() + print() + + elif response["choices"][0]["finish_reason"] == "function_call": + print("Received FUNCTION_CALL signal from GPT.") + fn_name = response["choices"][0]["message"]["function_call"]["name"] + arguments = response["choices"][0]["message"]["function_call"]["arguments"] + #json_arguments = json.loads(arguments) + #function = fn_names_dict[fn_name] + paths = fn_names_dict[fn_name]['path'] + full_url = base_url + paths + print(f"Running the {fn_name} function locally with args {arguments}") + response = panoptica_call_functions(full_url) + #result = function(**json_arguments) + print(f"Finished running {fn_name}. Output is {response._content}") + print() + print() + output.append(response._content.decode('utf-8')) + # output.append(f'You should call the {response.choices[0].message["function_call"].name} function using the following arguments : \n {response.choices[0].message["function_call"].arguments} \n Function raw output : {str(result)}') + messages.append(get_role_message_dict("assistant", fn_name=fn_name, arguments=arguments)) + messages.append(get_role_message_dict("function", fn_name=fn_name)) + response = run_with_functions(messages) + + + return output + +def RunFunction(message,functions): + # print(message,functions) + output = prompt_with_functions(message, functions["tag_dict"]['dashboard-controller'],function_dict=True) + return output \ No newline at end of file diff --git a/webex_UI/webex_bot/main.py b/webex_UI/webex_bot/main.py index 1c6822f..f88a9a5 100644 --- a/webex_UI/webex_bot/main.py +++ b/webex_UI/webex_bot/main.py @@ -38,8 +38,8 @@ """ from webex_bot.webex_bot import WebexBot -from cmds import SummarAcross, EmptySpace, SearchAcross, ListTranscripts, Actionables -from help import LoadTranscripts, InitilizeTranscripts +from cmds import SummarAcross, EmptySpace, SearchAcross, ListTranscripts, Actionables, Panoptica +from help import LoadTranscripts, InitilizeTranscripts, InitilizeSwaggerFunctions from constants import CONSTANTS import requests @@ -51,6 +51,7 @@ transcriptsFileName = "webex_transcripts.json" InitilizeTranscripts(transcriptsFileName) +functions = InitilizeSwaggerFunctions() bot = WebexBot(bot_token) print(dir(bot)) print(bot.device_info) @@ -63,5 +64,7 @@ bot.add_command(SummarAcross(transcriptsFileName)) bot.add_command(SearchAcross(transcriptsFileName)) bot.add_command(Actionables(transcriptsFileName)) +bot.add_command(Panoptica(functions)) + bot.run() \ No newline at end of file From add9e8584f78500dc05c8e19d81fcd6cb65d1761 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Mon, 23 Oct 2023 17:55:33 +0530 Subject: [PATCH 03/11] execute panoptica functions with webexbot --- backend/datasets/common/Swagger.py | 3 +- backend/models/common/OpenAI.py | 10 - webex_UI/webex_bot/escherauth.py | 313 +++++++++++++++++++++++++++++ webex_UI/webex_bot/main.py | 5 - yaml/07_panoptica_bot.yaml | 10 + 5 files changed, 324 insertions(+), 17 deletions(-) create mode 100644 webex_UI/webex_bot/escherauth.py create mode 100644 yaml/07_panoptica_bot.yaml diff --git a/backend/datasets/common/Swagger.py b/backend/datasets/common/Swagger.py index 41e9f16..4b0b967 100644 --- a/backend/datasets/common/Swagger.py +++ b/backend/datasets/common/Swagger.py @@ -24,9 +24,8 @@ def __init__(self): 'content-type': 'application/json'} self.auth=EscherRequestsAuth("global/services/portshift_request", {'current_time': self.date}, - {'api_key': "2a797523-3934-4698-9975-af13de9e15ca", 'api_secret': "kSN59Kje1AiOfFaTe+itdHiPUnFUIxC1bOs4gJ1kCnk="}) + {'api_key': "", 'api_secret': ""}) ############## - self.file_name = "webex_transcripts.json" self.swagger_json = self.fetch_swagger_json() self.api_info = self.extract_api_information(self.swagger_json) diff --git a/backend/models/common/OpenAI.py b/backend/models/common/OpenAI.py index 1b374ff..cce2b2f 100644 --- a/backend/models/common/OpenAI.py +++ b/backend/models/common/OpenAI.py @@ -7,20 +7,10 @@ import json from dotenv import load_dotenv import requests -#from algebra import add, sub, mul -#from sqlmethods import get_top_k_entries, setup_database, prompt_append -# from pylogic import create_problem, add_exact_position_constraint, \ -# add_below_constraint, add_above_constraint, check_option from typing import Callable, Dict import datetime from collections import defaultdict -# access_key = "2a797523-3934-4698-9975-af13de9e15ca" -# secret_key = "kSN59Kje1AiOfFaTe+itdHiPUnFUIxC1bOs4gJ1kCnk=" -# date_format = '%Y%m%dT%H%M%SZ' -# date_string = datetime.datetime.utcnow().strftime(date_format) -# date = datetime.datetime.strptime(date_string, date_format) -# openai_functions, swagger_data, tag_dict, classifier_tag = get_panoptica_data() def get_openAI_info(): """ diff --git a/webex_UI/webex_bot/escherauth.py b/webex_UI/webex_bot/escherauth.py new file mode 100644 index 0000000..a22c871 --- /dev/null +++ b/webex_UI/webex_bot/escherauth.py @@ -0,0 +1,313 @@ +#code taken from https://github.com/emartech/escher-python and updated to python3 + +import datetime +import hmac +import requests +import urllib.request, urllib.parse, urllib.error +import re + +from hashlib import sha256, sha512 + +try: + from urllib.parse import urlparse, parse_qsl, urljoin + from urllib.parse import quote +except: + from urllib.parse import urlparse, parse_qsl, urljoin, quote + + +class EscherException(Exception): + pass + + +class EscherRequestsAuth(requests.auth.AuthBase): + def __init__(self, credential_scope, options, client): + self.escher = Escher(credential_scope, options) + self.client = client + + def __call__(self, request): + return self.escher.sign(request, self.client) + + +class EscherRequest(): + _uri_regex = re.compile('([^?#]*)(\?(.*))?') + + def __init__(self, request): + self.type = type(request) + self.request = request + self.prepare_request_uri() + + def request(self): + return self.request + + def prepare_request_uri(self): + if self.type is requests.models.PreparedRequest: + self.request_uri = self.request.path_url + if self.type is dict: + self.request_uri = self.request['uri'] + match = re.match(self._uri_regex, self.request_uri) + self.uri_path = match.group(1) + self.uri_query = match.group(3) + + def method(self): + if self.type is requests.models.PreparedRequest: + return self.request.method + if self.type is dict: + return self.request['method'] + + def host(self): + if self.type is requests.models.PreparedRequest: + return self.request.host + if self.type is dict: + return self.request['host'] + + def path(self): + return self.uri_path + + def query_parts(self): + return parse_qsl((self.uri_query or '').replace(';', '%3b'), True) + + def headers(self): + if self.type is requests.models.PreparedRequest: + headers = [] + for key, value in self.request.headers.items(): + headers.append([key, value]) + return headers + if self.type is dict: + return self.request['headers'] + + def body(self): + if self.type is requests.models.PreparedRequest: + return self.request.body or '' + if self.type is dict: + return self.request.get('body', '') + + def add_header(self, header, value): + if self.type is requests.models.PreparedRequest: + self.request.headers[header] = value + if self.type is dict: + self.request['headers'].append((header, value)) + + +class AuthParams: + def __init__(self, data, vendor_key): + self._init_data(data, 'X-' + vendor_key + '-') + + def _init_data(self, data, prefix): + self._data = {} + for (k, v) in data: + if k.startswith(prefix): + self._data[k.replace(prefix, '').lower()] = v + + def get(self, name): + if name not in self._data: + raise EscherException('Missing authorization parameter: ' + name) + return self._data[name] + + def get_signed_headers(self): + return self.get('signedheaders').lower().split(';') + + def get_algo_data(self): + data = self.get('algorithm').split('-') + if len(data) != 3: + raise EscherException('Malformed Algorithm parameter') + return data + + def get_algo_prefix(self): + return self.get_algo_data()[0] + + def get_hash_algo(self): + return self.get_algo_data()[2].upper() + + def get_credential_data(self): + data = self.get('credentials').split('/', 2) + if len(data) != 3: + raise EscherException('Malformed Credentials parameter') + return data + + def get_credential_key(self): + return self.get_credential_data()[0] + + def get_credential_date(self): + return datetime.datetime.strptime(self.get_credential_data()[1], '%Y%m%d') + + def get_credential_scope(self): + return self.get_credential_data()[2] + + def get_expires(self): + return int(self.get('expires')) + + def get_request_date(self): + return datetime.datetime.strptime(self.get('date'), '%Y%m%dT%H%M%SZ') + + +class AuthenticationValidator: + def validate_mandatory_signed_headers(self, headers_to_sign): + if 'host' not in headers_to_sign: + raise EscherException('Host header is not signed') + + def validate_hash_algo(self, hash_algo): + if hash_algo not in ('SHA256', 'SHA512'): + raise EscherException('Only SHA256 and SHA512 hash algorithms are allowed') + + def validate_dates(self, current_date, request_date, credential_date, expires, clock_skew): + if request_date.strftime('%Y%m%d') != credential_date.strftime('%Y%m%d'): + raise EscherException('The request date and credential date do not match') + + min_date = current_date - datetime.timedelta(seconds=(clock_skew + expires)) + max_date = current_date + datetime.timedelta(seconds=clock_skew) + if request_date < min_date or request_date > max_date: + raise EscherException('Request date is not within the accepted time interval') + + def validate_credential_scope(self, expected, actual): + if actual != expected: + raise EscherException('Invalid credential scope (provided: ' + actual + ', required: ' + expected + ')') + + def validate_signature(self, expected, actual): + if expected != actual: + raise EscherException('The signatures do not match (provided: ' + actual + ', calculated: ' + expected + ')') + + +class Escher: + _normalize_path = re.compile('([^/]+/\.\./?|/\./|//|/\.$|/\.\.$)') + + def __init__(self, credential_scope, options={}): + self.credential_scope = credential_scope + self.algo_prefix = options.get('algo_prefix', 'ESR') + self.vendor_key = options.get('vendor_key', 'Escher') + self.hash_algo = options.get('hash_algo', 'SHA256') + self.current_time = options.get('current_time', datetime.datetime.utcnow()) + self.auth_header_name = options.get('auth_header_name', 'X-Escher-Auth') + self.date_header_name = options.get('date_header_name', 'X-Escher-Date') + self.clock_skew = options.get('clock_skew', 300) + self.algo = self.create_algo() + self.algo_id = self.algo_prefix + '-HMAC-' + self.hash_algo + + def sign(self, r, client, headers_to_sign=[]): + request = EscherRequest(r) + + for header in [self.date_header_name.lower(), 'host']: + if header not in headers_to_sign: + headers_to_sign.append(header) + + signature = self.generate_signature(client['api_secret'], request, headers_to_sign, self.current_time) + request.add_header(self.auth_header_name, ", ".join([ + self.algo_id + ' Credential=' + client['api_key'] + '/' + self.short_date( + self.current_time) + '/' + self.credential_scope, + 'SignedHeaders=' + self.prepare_headers_to_sign(headers_to_sign), + 'Signature=' + signature + ])) + return request.request + + def authenticate(self, r, key_db): + request = EscherRequest(r) + + auth_params = AuthParams(request.query_parts(), self.vendor_key) + validator = AuthenticationValidator() + + validator.validate_mandatory_signed_headers(auth_params.get_signed_headers()) + validator.validate_hash_algo(auth_params.get_hash_algo()) + validator.validate_dates( + self.current_time, + auth_params.get_request_date(), + auth_params.get_credential_date(), + auth_params.get_expires(), + self.clock_skew + ) + validator.validate_credential_scope(self.credential_scope, auth_params.get_credential_scope()) + + if auth_params.get_credential_key() not in key_db: + raise EscherException('Invalid Escher key') + + calculated_signature = self.generate_signature( + key_db[auth_params.get_credential_key()], request, + auth_params.get_signed_headers(), + auth_params.get_request_date() + ) + validator.validate_signature(calculated_signature, auth_params.get('signature')) + + return auth_params.get_credential_key() + + def hmac_digest(self, key, message, is_hex=False): + if not isinstance(key, bytes): + key = key.encode('utf-8') + digest = hmac.new(key, message.encode('utf-8'), self.algo) + if is_hex: + return digest.hexdigest() + return digest.digest() + + def generate_signature(self, api_secret, req, headers_to_sign, current_time): + canonicalized_request = self.canonicalize(req, headers_to_sign) + string_to_sign = self.get_string_to_sign(canonicalized_request, current_time) + + signing_key = self.hmac_digest(self.algo_prefix + api_secret, self.short_date(current_time)) + for data in self.credential_scope.split('/'): + signing_key = self.hmac_digest(signing_key, data) + + return self.hmac_digest(signing_key, string_to_sign, True) + + def canonicalize(self, req, headers_to_sign): + return "\n".join([ + req.method(), + self.canonicalize_path(req.path()), + self.canonicalize_query(req.query_parts()), + self.canonicalize_headers(req.headers(), headers_to_sign), + '', + self.prepare_headers_to_sign(headers_to_sign), + self.algo(req.body().encode('utf-8')).hexdigest() + ]) + + def canonicalize_path(self, path): + changes = 1 + while changes > 0: + path, changes = self._normalize_path.subn('/', path, 1) + return path + + def canonicalize_headers(self, headers, headers_to_sign): + headers_list = [] + for key, value in iter(sorted(headers)): + if key.lower() in headers_to_sign: + headers_list.append(key.lower() + ':' + self.normalize_white_spaces(value)) + return "\n".join(sorted(headers_list)) + + def normalize_white_spaces(self, value): + index = 0 + value_normalized = [] + pattern = re.compile(r'\s+') + for part in value.split('"'): + if index % 2 == 0: + part = pattern.sub(' ', part) + value_normalized.append(part) + index += 1 + return '"'.join(value_normalized).strip() + + def canonicalize_query(self, query_parts): + safe = "~+!'()*" + query_list = [] + for key, value in query_parts: + if key == 'X-' + self.vendor_key + '-Signature': + continue + query_list.append(quote(key, safe=safe) + '=' + quote(value, safe=safe)) + return "&".join(sorted(query_list)) + + def get_string_to_sign(self, canonicalized_request, current_time): + return "\n".join([ + self.algo_id, + self.long_date(current_time), + self.short_date(current_time) + '/' + self.credential_scope, + self.algo(canonicalized_request.encode('utf-8')).hexdigest() + ]) + + def create_algo(self): + if self.hash_algo == 'SHA256': + return sha256 + if self.hash_algo == 'SHA512': + return sha512 + + def long_date(self, time): + return time.strftime('%Y%m%dT%H%M%SZ') + + def short_date(self, time): + return time.strftime('%Y%m%d') + + def prepare_headers_to_sign(self, headers_to_sign): + return ";".join(sorted(headers_to_sign)) \ No newline at end of file diff --git a/webex_UI/webex_bot/main.py b/webex_UI/webex_bot/main.py index f88a9a5..8387b9b 100644 --- a/webex_UI/webex_bot/main.py +++ b/webex_UI/webex_bot/main.py @@ -53,12 +53,7 @@ InitilizeTranscripts(transcriptsFileName) functions = InitilizeSwaggerFunctions() bot = WebexBot(bot_token) -print(dir(bot)) -print(bot.device_info) -print(bot.device_url) -print(bot.on_message) -print(bot.websocket) bot.add_command(EmptySpace()) bot.add_command(ListTranscripts()) bot.add_command(SummarAcross(transcriptsFileName)) diff --git a/yaml/07_panoptica_bot.yaml b/yaml/07_panoptica_bot.yaml new file mode 100644 index 0000000..0565cd0 --- /dev/null +++ b/yaml/07_panoptica_bot.yaml @@ -0,0 +1,10 @@ +Title: Panoptica Function call with WebEx Bot +function: + task: ['functions'] + custom: true +metrics: {} +datasets: +- Swagger +models_actionables: +- OpenAI +module: ['openai'] From dd53ff039268aa6d2505805e7e11d60eb3b32cfd Mon Sep 17 00:00:00 2001 From: Vamsi Date: Wed, 25 Oct 2023 20:05:55 +0530 Subject: [PATCH 04/11] migration from flask to fastapi server --- backend/server_fastapi/__init__.py | 0 backend/server_fastapi/config.py | 69 +++++++++++++++++++ backend/server_fastapi/dependencies.py | 0 backend/server_fastapi/main.py | 57 +++++++++++++++ backend/server_fastapi/routers/__init__.py | 0 .../server_fastapi/routers/dataset_views.py | 35 ++++++++++ backend/server_fastapi/routers/model_views.py | 19 +++++ backend/server_fastapi/state.py | 3 + 8 files changed, 183 insertions(+) create mode 100644 backend/server_fastapi/__init__.py create mode 100644 backend/server_fastapi/config.py create mode 100644 backend/server_fastapi/dependencies.py create mode 100644 backend/server_fastapi/main.py create mode 100644 backend/server_fastapi/routers/__init__.py create mode 100644 backend/server_fastapi/routers/dataset_views.py create mode 100644 backend/server_fastapi/routers/model_views.py create mode 100644 backend/server_fastapi/state.py diff --git a/backend/server_fastapi/__init__.py b/backend/server_fastapi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/server_fastapi/config.py b/backend/server_fastapi/config.py new file mode 100644 index 0000000..8f2dc22 --- /dev/null +++ b/backend/server_fastapi/config.py @@ -0,0 +1,69 @@ +import os +import os.path as path +from werkzeug.utils import import_string + +basedir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) + + +class Config(object): + TESTING = False + + + + +class ProductionConfig(Config): + DATABASE_URI = 'test' + + +class DevelopmentConfig(Config): + # ASKI/user + FILES_DIR = os.path.join(basedir, "user") + + # /ASKI/aski/models + MODELS_DIR = os.path.join(basedir, "backend/models/") + + # /ASKI/aski/datasets + DATASETS_DIR = os.path.join(basedir, "backend/datasets/") + + PORT_REST_API = 3000 + PREF_REST_API = "http://0.0.0.0:" + DATABASE_URI = 'test' + TESTING = True + + +class TestingConfig(Config): + # ASKI/user + # ASKI/user + FILES_DIR = os.path.join(basedir, "user") + + # /ASKI/aski/models + MODELS_DIR = os.path.join(basedir, "backend/models/") + + # /ASKI/aski/datasets + DATASETS_DIR = os.path.join(basedir, "backend/datasets/") + PORT_REST_API = os.environ.get('PORT_REST_API', 3000) + PREF_REST_API = "http://0.0.0.0:" + DATABASE_URI = 'test' + TESTING = True + WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "NGM5N2U2MDgtMzc4YS00NjY1LWFjN2MtMjBhNTM4MTgzNzAyOWNkMmI3YTYtYjJk_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") + WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "YTAzMGJmYmQtY2I1Ni00MGRmLWJlNWYtNDJjNjY1NmFjZjljM2RjODhmY2QtY2M4_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") + + OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "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") + all_modules = {"openai":"backend.server.utils.openai_utils"} + BOT_EMAIL = 'blazetranscriptionbot@webex.bot' + + + @classmethod + def public_config(self): + return { + "WEBEX_BOT_TOKEN": self.WEBEX_BOT_TOKEN, + "WEBEX_ACCESS_TOKEN":self.WEBEX_ACCESS_TOKEN, + "OPENAPI_KEY":self.OPENAPI_KEY + } + @classmethod + def yaml_allowed_moduls(cls,yaml_defined_modules): + allowed_modules = {} + for module in yaml_defined_modules: + allowed_modules[module] = cls.all_modules.get(module) + + return allowed_modules diff --git a/backend/server_fastapi/dependencies.py b/backend/server_fastapi/dependencies.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/server_fastapi/main.py b/backend/server_fastapi/main.py new file mode 100644 index 0000000..3c01fb4 --- /dev/null +++ b/backend/server_fastapi/main.py @@ -0,0 +1,57 @@ +import json +import os +import copy +import yaml +import backend.server_fastapi.state as state +from fastapi import FastAPI +import argparse +from backend.server_fastapi.config import TestingConfig,ProductionConfig,DevelopmentConfig +from backend.server.utils.helpers import get_list_objects +from backend.server_fastapi.routers import dataset_views, model_views +from werkzeug.utils import import_string + +def from_object(state, obj): + if isinstance(obj, str): + obj = import_string(obj) + for key in dir(obj): + if key.isupper(): + state[key] = getattr(obj, key) + +yaml_file = os.getenv("yaml") + +with open(yaml_file, mode="rt", encoding="utf-8") as file: + server_config = yaml.safe_load(file) + +config_class = TestingConfig + +if "module" in server_config: + state.state.update(allowed_modules=config_class.yaml_allowed_moduls(server_config.get("module",None))) + +from_object(state.state,config_class) + +frontend_config = copy.deepcopy(server_config) +frontend_config.update(config_class.public_config()) +tasks_list = server_config['function']['task'] +server_config['model_objs'] = {} +if 'profiling' in server_config['function']: + os.environ['ASKI_PROFILING'] = str( + server_config['function']['profiling']) +else: + # Default + os.environ['ASKI_PROFILING'] = "false" + +for task in tasks_list: + server_config['model_objs'][task] = get_list_objects( + server_config['models_' + task], 'common', 'models') + +if 'datasets' in server_config: + server_config['dataset_objs'] = get_list_objects( + server_config['datasets'], 'common', 'datasets') + server_config['processes'] = {} + +state.state.update( + frontend_config=frontend_config, + server_config=server_config + ) +app = FastAPI() +app.include_router(dataset_views.router) \ No newline at end of file diff --git a/backend/server_fastapi/routers/__init__.py b/backend/server_fastapi/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/server_fastapi/routers/dataset_views.py b/backend/server_fastapi/routers/dataset_views.py new file mode 100644 index 0000000..8025400 --- /dev/null +++ b/backend/server_fastapi/routers/dataset_views.py @@ -0,0 +1,35 @@ +import json +from glob import glob +import os +import os.path as path +import requests +from backend.params.specifications import Specifications +from backend.server.utils.helpers import get_object_from_name +from fastapi import APIRouter, Depends, HTTPException,Response, status, Body +from typing import Any, Dict, AnyStr, List, Union +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field +import backend.server_fastapi.state as state + +router = APIRouter( + prefix="/datasets", + tags=["Dataset"], + dependencies=[], + responses={404: {"description": "Not found"}}, +) + +@router.get('/') +def datasetsList(): + specs = Specifications(state.state.get("MODELS_DIR"), state.state.get("DATASETS_DIR")) + return {'datasets_summarization': specs._list_datasets_summarization, 'datasets_search': specs._list_datasets_search} + +@router.get('/files') +def datasetFilesList(dataset: str): + dataset_name = str(dataset) + dataset_obj = get_object_from_name(dataset_name, state.state.get("server_config"), 'dataset') + if not dataset_obj: + return "That dataset doesn't exist", 404 + + titles = dataset_obj._get_topic_titles() + return {"files": titles} + diff --git a/backend/server_fastapi/routers/model_views.py b/backend/server_fastapi/routers/model_views.py new file mode 100644 index 0000000..9867829 --- /dev/null +++ b/backend/server_fastapi/routers/model_views.py @@ -0,0 +1,19 @@ +from fastapi import APIRouter + +router = APIRouter() + + + +@router.get("/users/", tags=["users"]) +async def read_users(): + return [{"username": "Rick"}, {"username": "Morty"}] + + +@router.get("/users/me", tags=["users"]) +async def read_user_me(): + return {"username": "fakecurrentuser"} + + +@router.get("/users/{username}", tags=["users"]) +async def read_user(username: str): + return {"username": username} \ No newline at end of file diff --git a/backend/server_fastapi/state.py b/backend/server_fastapi/state.py new file mode 100644 index 0000000..f877c5d --- /dev/null +++ b/backend/server_fastapi/state.py @@ -0,0 +1,3 @@ +state = { + "count":1 +} From 54361796cf7d7fb6bfbd22dc1a3468e341d23fd3 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Wed, 31 Jan 2024 20:13:37 +0530 Subject: [PATCH 05/11] added LLAMA2 and openai search --- backend/models/common/ElasticBERT.py | 8 +- backend/models/common/LLAMA2.py | 82 +++++++++++++++++++++ backend/models/common/LLM.py | 1 + backend/models/common/OpenAI.py | 26 ++++++- backend/models/interfaces/model_search.py | 28 +++---- backend/server_fastapi/main.py | 5 +- backend/server_fastapi/routers/ws_views.py | 86 ++++++++++++++++++++++ yaml/05_search_summary_webex.yaml | 5 +- 8 files changed, 218 insertions(+), 23 deletions(-) create mode 100644 backend/models/common/LLAMA2.py create mode 100644 backend/server_fastapi/routers/ws_views.py diff --git a/backend/models/common/ElasticBERT.py b/backend/models/common/ElasticBERT.py index e616ea7..2ff1f99 100644 --- a/backend/models/common/ElasticBERT.py +++ b/backend/models/common/ElasticBERT.py @@ -79,7 +79,7 @@ def load_model(self, file_name, file_content): for doc in self.docs: index_into_elasticsearch(doc) - def file_search(self, search_term): + def file_search(self, search_term, context=None): result = sum_docs = orig_w_h = new_candidate_docs = [] t_s = time.time() @@ -87,7 +87,8 @@ def file_search(self, search_term): hits = res['hits']['hits'] for i in range(len(hits)): - c_d = hits[i]['_source']['text'] + c_d = hits[i]['_source']['tex "context": "Beyonc\u00e9 attended St. Mary's Elementary School in Fredericksburg, Texas, where she enrolled in dance classes. Her singing talent was discovered when dance instructor Darlette Johnson began humming a song and she finished it, able to hit the high-pitched notes. Beyonc\u00e9's interest in music and performing continued after winning a school talent show at age seven, singing John Lennon's \"Imagine\" to beat 15/16-year-olds. In fall of 1990, Beyonc\u00e9 enrolled in Parker Elementary School, a music magnet school in Houston, where she would perform with the school's choir. She also attended the High School for the Performing and Visual Arts and later Alief Elsik High School. Beyonc\u00e9 was also a member of the choir at St. John's United Methodist Church as a soloist for two years." +t'] r, start, end = answer_question( search_term, c_d, self.model, self.tokenizer) @@ -100,8 +101,9 @@ def file_search(self, search_term): sum_docs = ['']*len(new_candidate_docs) res = [{'res': r, 'sum': s, 'orig': o, 'orig_w_h': o_h} for r, s, o, o_h in zip(result, sum_docs, new_candidate_docs, orig_w_h)] + t_e = time.time() t_search = t_e - t_s - return res, t_search + return res[0]["res"], t_search diff --git a/backend/models/common/LLAMA2.py b/backend/models/common/LLAMA2.py new file mode 100644 index 0000000..2f69d8f --- /dev/null +++ b/backend/models/common/LLAMA2.py @@ -0,0 +1,82 @@ +import openai +from flask import current_app +from llama_cpp import Llama +import os +import inspect +import re +import json +from dotenv import load_dotenv +import requests +from typing import Callable, Dict +import datetime +from collections import defaultdict +import time + +def get_LLAMA2_info(): + """ + Function to return a dictionnary containing the name, class name, + description, paper link and GitHub repo link of the BART model. It is used + throughout the code to get various information about the model. + + Returns + ------- + model_info : a dictionnary + A dictionnary containing the name, class name, + description, paper link and GitHub repo link of the T5 model + """ + model_info = { + 'name': "LLAMA2", + 'class_name': 'LLAMA2', + 'desc': "LLAMA2", + } + return model_info + +class LLAMA2(): + tasks_supported = ["actionables","summarization","chat"] + model = None + + def __init__(self): + + self._info = get_LLAMA2_info() + + def load_model(self,*args): + self.model = Llama(model_path="/home/vamsi/projects/llama2/llama2-webui/models/llama-2-7b-chat.Q4_0.gguf", n_ctx=2048) + + def _get_model_info(self): + pass + + def _get_name(self): + return self._info['name'] + + def _get_class_name(self): + return self._info['class_name'] + + def file_search_prompt_format(self,search_term,context): + return f""" + Given story and question below. return appropriate answer in a word or two. + + ### Story: + {context} + + ### Question: + Q:{search_term} + + ### Answer: + + \n + """ + + def file_search(self,search_term,context): + # prompt = "You are a helpful assistant answering questions based on the context provided.Reply with value only, no other text." + # message = f"{prompt}\n{context}\nQuestion:{search_term}" + t_s = time.time() + output = self.model(self.file_search_prompt_format(search_term,context), # Prompt + max_tokens=32, # Generate up to 32 tokens + echo=False # Echo the prompt back in the output + ) + res = output["choices"][0]["text"] + print(res) + t_e = time.time() + t_search = t_e - t_s + + return res, t_search diff --git a/backend/models/common/LLM.py b/backend/models/common/LLM.py index 4a1d64f..e8a70c1 100644 --- a/backend/models/common/LLM.py +++ b/backend/models/common/LLM.py @@ -2,3 +2,4 @@ class LLMs(): + pass diff --git a/backend/models/common/OpenAI.py b/backend/models/common/OpenAI.py index cce2b2f..f78ac34 100644 --- a/backend/models/common/OpenAI.py +++ b/backend/models/common/OpenAI.py @@ -10,7 +10,7 @@ from typing import Callable, Dict import datetime from collections import defaultdict - +import time def get_openAI_info(): """ @@ -39,6 +39,9 @@ def __init__(self): self._info = get_openAI_info() + def load_model(self,backend/models/common/LLM.py*args): + openai.api_key = current_app.config.get('OPENAPI_KEY') + def _get_model_info(self): pass @@ -226,4 +229,23 @@ def run_with_functions(self,messages,function_dicts): ) print(type(response)) print(response) - return response \ No newline at end of file + return response + + def file_search(self, search_term,context): + prompt = "You are a helpful assistant answering questions based on the context provided.Reply with value only, no other text." + message = f"{prompt}\n{context}\nQuestion:{search_term}" + t_s = time.time() + response = openai.Completion.create( + model="text-davinci-003", + prompt=message, + temperature=0.7, + max_tokens=892, + top_p=1, + frequency_penalty=0, + presence_penalty=0 + ) + res = response['choices'][0]['text'] + t_e = time.time() + t_search = t_e - t_s + + return res, t_search diff --git a/backend/models/interfaces/model_search.py b/backend/models/interfaces/model_search.py index f3425bc..578025b 100644 --- a/backend/models/interfaces/model_search.py +++ b/backend/models/interfaces/model_search.py @@ -483,7 +483,7 @@ def segment_documents(docs, max_doc_length=300): } -def squad_benchmarkV2(file_name,model_obj,sio=None,channel=None): +def squad_benchmarkV2(file_name,model_obj, websocket_response:bool): dataset = Squad.Squad() # Load all questions/files for associated dataset (SQUAD) # socketio.emit("response","emit is working here too") @@ -525,6 +525,7 @@ def squad_benchmarkV2(file_name,model_obj,sio=None,channel=None): # TODO: currently retriever has little work to do # Start iterating through all answerable questions + incorrect_d = [] for question in questions: if(len(question["answers"]["text"])!=0): @@ -538,15 +539,15 @@ def squad_benchmarkV2(file_name,model_obj,sio=None,channel=None): break print(f"(squad_benchmark) > Question: {q_text}") - print(f"(squad_benchmark) > Valid ans: {q_ansl}") + # print(f"(squad_benchmark) > Valid ans: {q_ansl}") - res, time = model_obj.file_search(q_text) - m_ans = res[0]['res'] + res, time = model_obj.file_search(q_text,question['context']) + m_ans = res valid = was_correct(m_ans, q_ansl) - print(f"(squad_benchmark) > Time Taken: {time}") - print(f"(squad_benchmark) > Corect?: {valid}") + # print(f"(squad_benchmark) > Time Taken: {time}") + # print(f"(squad_benchmark) > Corect?: {valid}") results["questions"]["tot_qs"] = results["questions"]["tot_qs"] + 1 results["times"]["all_ts"].append(time) @@ -563,8 +564,8 @@ def squad_benchmarkV2(file_name,model_obj,sio=None,channel=None): "context":question['context'] }) - if sio: - sio_response = { + if websocket_response: + response = { 'percent_questions_correct':round(100 * np.mean(results["metrics"]["correct_arr"]), 2), 'number_of_questions_correct':results["metrics"]["correct_arr"].count(1), 'number_of_questions_total':results["questions"]["num_qs"], @@ -572,9 +573,9 @@ def squad_benchmarkV2(file_name,model_obj,sio=None,channel=None): 'progress':round(100.0 * results["metrics"]["correct_arr"].count(1) / (results["questions"]["num_qs"]+0.001), 2), 'incorrect':incorrect_d } - sio.emit(channel,sio_response) - sio.sleep(1) - except: + yield response + except Exception as e: + print(e) print(f"(squad_benchmark) > Exited prematurely, skipping question.") results["times"]["avg_ts"] = np.mean(results["times"]["all_ts"]) @@ -583,14 +584,15 @@ def squad_benchmarkV2(file_name,model_obj,sio=None,channel=None): results["metrics"]["accuracy_prc"] = np.mean( results["metrics"]["correct_arr"]) - if sio: - sio_response = { + if websocket_response: + response = { 'percent_questions_correct':round(100 * np.mean(results["metrics"]["correct_arr"]), 2), 'number_of_questions_correct':results["metrics"]["correct_arr"].count(1), 'number_of_questions_total':results["questions"]["num_qs"], 'average_time_per_question':round(np.mean(results["times"]["all_ts"])), 'progress':round(100.0 * results["metrics"]["correct_arr"].count(1) / (results["questions"]["num_qs"]+0.001), 2) } + yield response diff --git a/backend/server_fastapi/main.py b/backend/server_fastapi/main.py index 3c01fb4..bb67c07 100644 --- a/backend/server_fastapi/main.py +++ b/backend/server_fastapi/main.py @@ -7,7 +7,7 @@ import argparse from backend.server_fastapi.config import TestingConfig,ProductionConfig,DevelopmentConfig from backend.server.utils.helpers import get_list_objects -from backend.server_fastapi.routers import dataset_views, model_views +from backend.server_fastapi.routers import dataset_views, model_views, ws_views from werkzeug.utils import import_string def from_object(state, obj): @@ -54,4 +54,5 @@ def from_object(state, obj): server_config=server_config ) app = FastAPI() -app.include_router(dataset_views.router) \ No newline at end of file +app.include_router(dataset_views.router) +app.include_router(ws_views.router) \ No newline at end of file diff --git a/backend/server_fastapi/routers/ws_views.py b/backend/server_fastapi/routers/ws_views.py new file mode 100644 index 0000000..bb4c68d --- /dev/null +++ b/backend/server_fastapi/routers/ws_views.py @@ -0,0 +1,86 @@ +from fastapi import WebSocket, APIRouter +from fastapi.responses import HTMLResponse +import backend.server_fastapi.state as state +from backend.models.interfaces.model_search import squad_benchmarkV2 +import json +import asyncio + +count = 0 +router = APIRouter( + prefix="/ws", + tags=["WebSockets"], + dependencies=[], + responses={404: {"description": "Not found"}}, +) + +html = """ + + + + Chat + + +

WebSocket Chat

+
+ + +
+
    +
+ + + +""" + +@router.get("/") +async def get(): + return HTMLResponse(html) + +def posCount(websocket): + count = count+1 + websocket.send_text(f"Message text was: {count}") + +import time +@router.websocket("/benchmark") +async def benchmark(websocket: WebSocket): + await websocket.accept() + count = 0 + print(state.state) + while True: + data = await websocket.receive_text() + file = data + model_obj = state.state.get("server_config")["model_objs"]["search"][0] + for res in squad_benchmarkV2(file_name=file,model_obj=model_obj,websocket_response=True): + # print(res) + await websocket.send_text(json.dumps(res)) + await asyncio.sleep(0.1) + +@router.websocket("/benchmark2") +async def benchmark(websocket: WebSocket): + await websocket.accept() + count = 0 + print(state.state) + while True: + data = await websocket.receive_text() + file = data + model_obj = state.state.get("server_config")["model_objs"]["search"][1] + for res in squad_benchmarkV2(file_name=file,model_obj=model_obj,websocket_response=True): + # print(res) + await websocket.send_text(json.dumps(res)) + await asyncio.sleep(0.1) + \ No newline at end of file diff --git a/yaml/05_search_summary_webex.yaml b/yaml/05_search_summary_webex.yaml index c9bd1bc..b5e4073 100644 --- a/yaml/05_search_summary_webex.yaml +++ b/yaml/05_search_summary_webex.yaml @@ -4,11 +4,10 @@ function: custom: true metrics: {} datasets: -- WebEx +- Squad models_search: -- ElasticBERT +- LLAMA2 models_summarization: -- Bart - OpenAI models_actionables: - OpenAI From d1b682c3edd3bad4137c528eb36a0fddc47d7131 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Mon, 12 Feb 2024 21:00:53 +0530 Subject: [PATCH 06/11] added type and schemas for fastapi server --- backend/models/common/ElasticBERT.py | 3 +- backend/params/specifications.py | 2 +- backend/server_fastapi/config.py | 6 +- .../{routers => core/models}/__init__.py | 0 .../server_fastapi/core/schemas/__init__.py | 0 .../core/schemas/dataset_schema.py | 18 ++ .../core/schemas/general_schema.py | 4 + backend/server_fastapi/core/views/__init__.py | 0 .../core/views/dataset_views.py | 167 ++++++++++++++++++ .../server_fastapi/core/views/model_views.py | 0 .../{routers => core/views}/ws_views.py | 0 backend/server_fastapi/main.py | 3 +- .../server_fastapi/routers/dataset_views.py | 35 ---- backend/server_fastapi/routers/model_views.py | 19 -- backend/server_fastapi/state.py | 1 - 15 files changed, 198 insertions(+), 60 deletions(-) rename backend/server_fastapi/{routers => core/models}/__init__.py (100%) create mode 100644 backend/server_fastapi/core/schemas/__init__.py create mode 100644 backend/server_fastapi/core/schemas/dataset_schema.py create mode 100644 backend/server_fastapi/core/schemas/general_schema.py create mode 100644 backend/server_fastapi/core/views/__init__.py create mode 100644 backend/server_fastapi/core/views/dataset_views.py create mode 100644 backend/server_fastapi/core/views/model_views.py rename backend/server_fastapi/{routers => core/views}/ws_views.py (100%) delete mode 100644 backend/server_fastapi/routers/dataset_views.py delete mode 100644 backend/server_fastapi/routers/model_views.py diff --git a/backend/models/common/ElasticBERT.py b/backend/models/common/ElasticBERT.py index 2ff1f99..ab368bb 100644 --- a/backend/models/common/ElasticBERT.py +++ b/backend/models/common/ElasticBERT.py @@ -87,8 +87,7 @@ def file_search(self, search_term, context=None): hits = res['hits']['hits'] for i in range(len(hits)): - c_d = hits[i]['_source']['tex "context": "Beyonc\u00e9 attended St. Mary's Elementary School in Fredericksburg, Texas, where she enrolled in dance classes. Her singing talent was discovered when dance instructor Darlette Johnson began humming a song and she finished it, able to hit the high-pitched notes. Beyonc\u00e9's interest in music and performing continued after winning a school talent show at age seven, singing John Lennon's \"Imagine\" to beat 15/16-year-olds. In fall of 1990, Beyonc\u00e9 enrolled in Parker Elementary School, a music magnet school in Houston, where she would perform with the school's choir. She also attended the High School for the Performing and Visual Arts and later Alief Elsik High School. Beyonc\u00e9 was also a member of the choir at St. John's United Methodist Church as a soloist for two years." -t'] + c_d = hits[i]['_source']['text'] r, start, end = answer_question( search_term, c_d, self.model, self.tokenizer) diff --git a/backend/params/specifications.py b/backend/params/specifications.py index 69db8f6..793e559 100644 --- a/backend/params/specifications.py +++ b/backend/params/specifications.py @@ -85,7 +85,7 @@ def parse_objects(folder, task): class Specifications: - def __init__(self, model_path='backend/models', datasets_path='backend/datasets'): + def __init__(self, model_path:str ='backend/models', datasets_path:str ='backend/datasets'): self._list_datasets_summarization = parse_objects( datasets_path, '/common/') diff --git a/backend/server_fastapi/config.py b/backend/server_fastapi/config.py index 8f2dc22..81108d7 100644 --- a/backend/server_fastapi/config.py +++ b/backend/server_fastapi/config.py @@ -2,7 +2,11 @@ import os.path as path from werkzeug.utils import import_string -basedir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) +#fastapi +basedir = os.path.normpath(os.path.join(os.path.dirname(__file__), "../..")) + +#flask +# basedir = os.path.normpath(os.path.join(os.path.dirname(__file__), "../..")) class Config(object): diff --git a/backend/server_fastapi/routers/__init__.py b/backend/server_fastapi/core/models/__init__.py similarity index 100% rename from backend/server_fastapi/routers/__init__.py rename to backend/server_fastapi/core/models/__init__.py diff --git a/backend/server_fastapi/core/schemas/__init__.py b/backend/server_fastapi/core/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/server_fastapi/core/schemas/dataset_schema.py b/backend/server_fastapi/core/schemas/dataset_schema.py new file mode 100644 index 0000000..6f8c475 --- /dev/null +++ b/backend/server_fastapi/core/schemas/dataset_schema.py @@ -0,0 +1,18 @@ +from pydantic import BaseModel +from typing import Union + +class DatasetFilesUpload(BaseModel): + file: str + content: Union[str, None] = None + + +class DatasetFileList(BaseModel): + files: list[str] + +class UploadedFile(BaseModel): + fileName:str + content:str + +class File(BaseModel): + filename:str + fileClass:str \ No newline at end of file diff --git a/backend/server_fastapi/core/schemas/general_schema.py b/backend/server_fastapi/core/schemas/general_schema.py new file mode 100644 index 0000000..fe1b99d --- /dev/null +++ b/backend/server_fastapi/core/schemas/general_schema.py @@ -0,0 +1,4 @@ +from pydantic import BaseModel + +class Error404Message(BaseModel): + message: str diff --git a/backend/server_fastapi/core/views/__init__.py b/backend/server_fastapi/core/views/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/server_fastapi/core/views/dataset_views.py b/backend/server_fastapi/core/views/dataset_views.py new file mode 100644 index 0000000..88f60e8 --- /dev/null +++ b/backend/server_fastapi/core/views/dataset_views.py @@ -0,0 +1,167 @@ +import json +from glob import glob +import os +import os.path as path +import requests +from backend.server.utils.helpers import get_object_from_name +from fastapi import APIRouter, Depends, HTTPException, Response, status, Body +from typing import Any, Dict, AnyStr, List, Union, Annotated +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field +import backend.server_fastapi.state as state +from backend.params.specifications import Specifications +from backend.server_fastapi.core.schemas.dataset_schema import DatasetFileList, UploadedFile, File +from backend.server_fastapi.core.schemas.general_schema import Error404Message +from fastapi.responses import JSONResponse + +router = APIRouter( + prefix="/datasets", + tags=["Dataset"], + dependencies=[], + responses={404: {"description": "Not found"}}, +) + + +@router.get('/') +def datasets(): + """ + List all datasets + """ + specs = Specifications() + return {'datasets_summarization': specs._list_datasets_summarization, 'datasets_search': specs._list_datasets_search} + + +@router.get('/files', + response_model=DatasetFileList, + responses={ + 404: {"model": Error404Message, "description": "Files not found for dataset"}, + 200: { + "description": "Dataset files requested by Datasetname", + "content": { + "application/json": { + "files": ["Beyonce", "The Nightangle"] + } + }, + }, + },) +def dataset_files_list(dataset: str): + """ + List the files from given dataset name + """ + dataset_name = str(dataset) + dataset_obj = get_object_from_name( + dataset_name, state.state.get("server_config"), 'dataset') + if not dataset_obj: + return JSONResponse(status_code=404, content={"message": "Files not found"}) + + titles = dataset_obj._get_topic_titles() + return {"files": titles} + + +@router.post('/files', response_model=UploadedFile, + responses={ + 404: {"model": Error404Message, "description": "Files not found for dataset"}, + 200: { + "description": "Returns uploaded if uploaded successful", + "content": { + "application/json": { + "message": "Uploaded" + } + }, + }, + },) +def dataset_file_Upload(file: UploadedFile): + """ + Create file using the filename and content + """ + + if not any(file.fileName.endswith(ext) for ext in ['.txt', '.pdf']): + return JSONResponse(status_code=404, content={"message": "Please use either .txt or .pdf filename"}) + + print(state.state) + filepath = path.join(state.state.get("FILES_DIR"), file.fileName) + isBytes = "" if file.fileName.endswith('.txt') else 'b' + with open(filepath, f'w{isBytes}') as f: + f.write(file.content) + + for dataset_obj in state.state.get("server_config")['dataset_objs']: + if dataset_obj._dataset_name == "User": + dataset_obj._update_file(file.fileName) + break + + return {"message": "Uploaded"}, 200 + + +@router.post('/datasets/files/detail', + response_model=File, + responses={ + 404: {"model": Error404Message, "description": "File not found"}, + 200: { + "description": "Details of files", + "content": { + "application/json": { + "content": "test", + "size": "1" + } + }, + }, + },) +def dataset_file_details(fileName: Annotated[str, Body()], fileClass: Annotated[str, Body()]) -> Union[Dict[str, str], None]: + if fileClass == 'User' or fileName == "WebEx": + filepaths = glob( + path.join(state.state.get("FILES_DIR"), '**', fileName), recursive=True) + + if len(filepaths) > 0: + filepath = filepaths[0] + + with open(filepath, 'r') as f: + if filepath.endswith(".json"): + content = json.loads(f.read()) + else: + content = f.read() + size = os.path.getsize(filepath) / 1000 + else: + return JSONResponse(status_code=404, content={"message": "File does not exist"}) + else: + + dataset_obj = get_object_from_name( + fileClass, state.state.get("server_config"), 'dataset') + print(dataset_obj) + if 'search' in dataset_obj.functions_supported: + content = dataset_obj._get_title_story(fileName) + content = ' '.join(sentence for sentence in content) + size = "N/A" + elif ("summarization" in dataset_obj.functions_supported) and ("search" not in dataset_obj): + content = None + size = None + else: + return JSONResponse(status_code=404, content={"message": "File does not exist"}) + + response_data = {} + response_data['content'] = content + response_data['size'] = size + + return response_data + + +@router.post('/list_webex_meeting_transcripts', + response_model=File, + responses={ + 404: {"model": Error404Message, "description": "File not found"}, + 200: { + "description": "List meeting transcripts", + "content": { + "application/json": { + "response": "[]", + "recordings": "[]" + } + }, + }, + },) +def list_meeting_transcripts(): + dataset_obj = get_object_from_name( + "WebEx", state.state.get("server_config"), 'dataset') + if not dataset_obj: + return JSONResponse(status_code=404, content={"message": "That dataset doesn't exist"}) + + return {"response": dataset_obj.list_meetings(), "recordings": dataset_obj.recordings} diff --git a/backend/server_fastapi/core/views/model_views.py b/backend/server_fastapi/core/views/model_views.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/server_fastapi/routers/ws_views.py b/backend/server_fastapi/core/views/ws_views.py similarity index 100% rename from backend/server_fastapi/routers/ws_views.py rename to backend/server_fastapi/core/views/ws_views.py diff --git a/backend/server_fastapi/main.py b/backend/server_fastapi/main.py index bb67c07..21d94aa 100644 --- a/backend/server_fastapi/main.py +++ b/backend/server_fastapi/main.py @@ -2,12 +2,13 @@ import os import copy import yaml +from backend.server_fastapi.core.views import dataset_views, model_views import backend.server_fastapi.state as state from fastapi import FastAPI import argparse from backend.server_fastapi.config import TestingConfig,ProductionConfig,DevelopmentConfig from backend.server.utils.helpers import get_list_objects -from backend.server_fastapi.routers import dataset_views, model_views, ws_views +from backend.server_fastapi.core.views import ws_views from werkzeug.utils import import_string def from_object(state, obj): diff --git a/backend/server_fastapi/routers/dataset_views.py b/backend/server_fastapi/routers/dataset_views.py deleted file mode 100644 index 8025400..0000000 --- a/backend/server_fastapi/routers/dataset_views.py +++ /dev/null @@ -1,35 +0,0 @@ -import json -from glob import glob -import os -import os.path as path -import requests -from backend.params.specifications import Specifications -from backend.server.utils.helpers import get_object_from_name -from fastapi import APIRouter, Depends, HTTPException,Response, status, Body -from typing import Any, Dict, AnyStr, List, Union -from fastapi.responses import JSONResponse -from pydantic import BaseModel, Field -import backend.server_fastapi.state as state - -router = APIRouter( - prefix="/datasets", - tags=["Dataset"], - dependencies=[], - responses={404: {"description": "Not found"}}, -) - -@router.get('/') -def datasetsList(): - specs = Specifications(state.state.get("MODELS_DIR"), state.state.get("DATASETS_DIR")) - return {'datasets_summarization': specs._list_datasets_summarization, 'datasets_search': specs._list_datasets_search} - -@router.get('/files') -def datasetFilesList(dataset: str): - dataset_name = str(dataset) - dataset_obj = get_object_from_name(dataset_name, state.state.get("server_config"), 'dataset') - if not dataset_obj: - return "That dataset doesn't exist", 404 - - titles = dataset_obj._get_topic_titles() - return {"files": titles} - diff --git a/backend/server_fastapi/routers/model_views.py b/backend/server_fastapi/routers/model_views.py deleted file mode 100644 index 9867829..0000000 --- a/backend/server_fastapi/routers/model_views.py +++ /dev/null @@ -1,19 +0,0 @@ -from fastapi import APIRouter - -router = APIRouter() - - - -@router.get("/users/", tags=["users"]) -async def read_users(): - return [{"username": "Rick"}, {"username": "Morty"}] - - -@router.get("/users/me", tags=["users"]) -async def read_user_me(): - return {"username": "fakecurrentuser"} - - -@router.get("/users/{username}", tags=["users"]) -async def read_user(username: str): - return {"username": username} \ No newline at end of file diff --git a/backend/server_fastapi/state.py b/backend/server_fastapi/state.py index f877c5d..1b6a870 100644 --- a/backend/server_fastapi/state.py +++ b/backend/server_fastapi/state.py @@ -1,3 +1,2 @@ state = { - "count":1 } From e2c8717cdd804eb1378da4670131606715269ef8 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Mon, 12 Feb 2024 21:11:23 +0530 Subject: [PATCH 07/11] removed unused code from wsviews --- backend/server_fastapi/core/views/ws_views.py | 44 +------------------ 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/backend/server_fastapi/core/views/ws_views.py b/backend/server_fastapi/core/views/ws_views.py index bb4c68d..33e9d91 100644 --- a/backend/server_fastapi/core/views/ws_views.py +++ b/backend/server_fastapi/core/views/ws_views.py @@ -4,8 +4,8 @@ from backend.models.interfaces.model_search import squad_benchmarkV2 import json import asyncio +import time -count = 0 router = APIRouter( prefix="/ws", tags=["WebSockets"], @@ -13,49 +13,7 @@ responses={404: {"description": "Not found"}}, ) -html = """ - - - - Chat - - -

WebSocket Chat

-
- - -
-
    -
- - - -""" - -@router.get("/") -async def get(): - return HTMLResponse(html) -def posCount(websocket): - count = count+1 - websocket.send_text(f"Message text was: {count}") - -import time @router.websocket("/benchmark") async def benchmark(websocket: WebSocket): await websocket.accept() From 81ad3ce5940cdd358f81d3661b8a3c1d992b72bc Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Date: Mon, 12 Feb 2024 21:14:12 +0530 Subject: [PATCH 08/11] Delete backend/config.py --- backend/config.py | 77 ----------------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 backend/config.py diff --git a/backend/config.py b/backend/config.py deleted file mode 100644 index fad51f4..0000000 --- a/backend/config.py +++ /dev/null @@ -1,77 +0,0 @@ -import os -import os.path as path -from tinydb import TinyDB, Query -basedir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) - - -class Config(object): - TESTING = False - - -class ProductionConfig(Config): - DATABASE_URI = 'test' - - -class DevelopmentConfig(Config): - # ASKI/user - FILES_DIR = os.path.join(basedir, "user") - DB_CONFIG_FILE = os.path.join(basedir, "config.json") - # /ASKI/aski/models - MODELS_DIR = os.path.join(basedir, "backend/models/") - - # /ASKI/aski/datasets - DATASETS_DIR = os.path.join(basedir, "backend/datasets/") - - PORT_REST_API = 3000 - PREF_REST_API = "http://0.0.0.0:" - DATABASE_URI = 'test' - TESTING = True - - -class TestingConfig(Config): - # ASKI/user - # ASKI/user - FILES_DIR = os.path.join(basedir, "user") - - ############DB Config############### - DB_CONFIG_FILE = os.path.join(basedir, "config.json") - db = TinyDB(DB_CONFIG_FILE) - DBConfig = Query() - # /ASKI/aski/models - MODELS_DIR = os.path.join(basedir, "backend/models/") - CONFIG_DIR = os.path.join(basedir) - - # /ASKI/aski/datasets - DATASETS_DIR = os.path.join(basedir, "backend/datasets/") - PORT_REST_API = os.environ.get('PORT_REST_API', 3000) - PREF_REST_API = "http://0.0.0.0:" - DATABASE_URI = 'test' - TESTING = True - WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "NGM5N2U2MDgtMzc4YS00NjY1LWFjN2MtMjBhNTM4MTgzNzAyOWNkMmI3YTYtYjJk_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") - WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "YTAzMGJmYmQtY2I1Ni00MGRmLWJlNWYtNDJjNjY1NmFjZjljM2RjODhmY2QtY2M4_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") - - OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "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") - all_modules = {"openai":"backend.server.utils.openai_utils"} - BOT_EMAIL = 'blazetranscriptionbot@webex.bot' - - SLACK_BOT_TOKEN = os.environ.get('SLACK_BOT_TOKEN', "") - SLACK_APP_TOKEN = os.environ.get('SLACK_APP_TOKEN', "") - - - @classmethod - def public_config(self): - return { - "WEBEX_BOT_TOKEN": self.WEBEX_BOT_TOKEN, - "WEBEX_ACCESS_TOKEN":self.WEBEX_ACCESS_TOKEN, - "OPENAPI_KEY":self.OPENAPI_KEY, - "SLACK_APP_TOKEN": self.SLACK_APP_TOKEN, - "SLACK_BOT_TOKEN": self.SLACK_BOT_TOKEN - } - - @classmethod - def yaml_allowed_moduls(cls,yaml_defined_modules): - allowed_modules = {} - for module in yaml_defined_modules: - allowed_modules[module] = cls.all_modules.get(module) - - return allowed_modules From bc1055813fd27e895c4c63fde8c77d13dcba1791 Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Date: Mon, 12 Feb 2024 21:14:35 +0530 Subject: [PATCH 09/11] Delete backend/server_fastapi/config.py --- backend/server_fastapi/config.py | 73 -------------------------------- 1 file changed, 73 deletions(-) delete mode 100644 backend/server_fastapi/config.py diff --git a/backend/server_fastapi/config.py b/backend/server_fastapi/config.py deleted file mode 100644 index 81108d7..0000000 --- a/backend/server_fastapi/config.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -import os.path as path -from werkzeug.utils import import_string - -#fastapi -basedir = os.path.normpath(os.path.join(os.path.dirname(__file__), "../..")) - -#flask -# basedir = os.path.normpath(os.path.join(os.path.dirname(__file__), "../..")) - - -class Config(object): - TESTING = False - - - - -class ProductionConfig(Config): - DATABASE_URI = 'test' - - -class DevelopmentConfig(Config): - # ASKI/user - FILES_DIR = os.path.join(basedir, "user") - - # /ASKI/aski/models - MODELS_DIR = os.path.join(basedir, "backend/models/") - - # /ASKI/aski/datasets - DATASETS_DIR = os.path.join(basedir, "backend/datasets/") - - PORT_REST_API = 3000 - PREF_REST_API = "http://0.0.0.0:" - DATABASE_URI = 'test' - TESTING = True - - -class TestingConfig(Config): - # ASKI/user - # ASKI/user - FILES_DIR = os.path.join(basedir, "user") - - # /ASKI/aski/models - MODELS_DIR = os.path.join(basedir, "backend/models/") - - # /ASKI/aski/datasets - DATASETS_DIR = os.path.join(basedir, "backend/datasets/") - PORT_REST_API = os.environ.get('PORT_REST_API', 3000) - PREF_REST_API = "http://0.0.0.0:" - DATABASE_URI = 'test' - TESTING = True - WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "NGM5N2U2MDgtMzc4YS00NjY1LWFjN2MtMjBhNTM4MTgzNzAyOWNkMmI3YTYtYjJk_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") - WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "YTAzMGJmYmQtY2I1Ni00MGRmLWJlNWYtNDJjNjY1NmFjZjljM2RjODhmY2QtY2M4_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") - - OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "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") - all_modules = {"openai":"backend.server.utils.openai_utils"} - BOT_EMAIL = 'blazetranscriptionbot@webex.bot' - - - @classmethod - def public_config(self): - return { - "WEBEX_BOT_TOKEN": self.WEBEX_BOT_TOKEN, - "WEBEX_ACCESS_TOKEN":self.WEBEX_ACCESS_TOKEN, - "OPENAPI_KEY":self.OPENAPI_KEY - } - @classmethod - def yaml_allowed_moduls(cls,yaml_defined_modules): - allowed_modules = {} - for module in yaml_defined_modules: - allowed_modules[module] = cls.all_modules.get(module) - - return allowed_modules From bdbcc53fbfbf8ed936ce912ea91c5e6eec430d63 Mon Sep 17 00:00:00 2001 From: Vamsi Krishna Date: Mon, 12 Feb 2024 21:16:21 +0530 Subject: [PATCH 10/11] Delete external_apps/webex_bot/escherauth.py --- external_apps/webex_bot/escherauth.py | 313 -------------------------- 1 file changed, 313 deletions(-) delete mode 100644 external_apps/webex_bot/escherauth.py diff --git a/external_apps/webex_bot/escherauth.py b/external_apps/webex_bot/escherauth.py deleted file mode 100644 index a22c871..0000000 --- a/external_apps/webex_bot/escherauth.py +++ /dev/null @@ -1,313 +0,0 @@ -#code taken from https://github.com/emartech/escher-python and updated to python3 - -import datetime -import hmac -import requests -import urllib.request, urllib.parse, urllib.error -import re - -from hashlib import sha256, sha512 - -try: - from urllib.parse import urlparse, parse_qsl, urljoin - from urllib.parse import quote -except: - from urllib.parse import urlparse, parse_qsl, urljoin, quote - - -class EscherException(Exception): - pass - - -class EscherRequestsAuth(requests.auth.AuthBase): - def __init__(self, credential_scope, options, client): - self.escher = Escher(credential_scope, options) - self.client = client - - def __call__(self, request): - return self.escher.sign(request, self.client) - - -class EscherRequest(): - _uri_regex = re.compile('([^?#]*)(\?(.*))?') - - def __init__(self, request): - self.type = type(request) - self.request = request - self.prepare_request_uri() - - def request(self): - return self.request - - def prepare_request_uri(self): - if self.type is requests.models.PreparedRequest: - self.request_uri = self.request.path_url - if self.type is dict: - self.request_uri = self.request['uri'] - match = re.match(self._uri_regex, self.request_uri) - self.uri_path = match.group(1) - self.uri_query = match.group(3) - - def method(self): - if self.type is requests.models.PreparedRequest: - return self.request.method - if self.type is dict: - return self.request['method'] - - def host(self): - if self.type is requests.models.PreparedRequest: - return self.request.host - if self.type is dict: - return self.request['host'] - - def path(self): - return self.uri_path - - def query_parts(self): - return parse_qsl((self.uri_query or '').replace(';', '%3b'), True) - - def headers(self): - if self.type is requests.models.PreparedRequest: - headers = [] - for key, value in self.request.headers.items(): - headers.append([key, value]) - return headers - if self.type is dict: - return self.request['headers'] - - def body(self): - if self.type is requests.models.PreparedRequest: - return self.request.body or '' - if self.type is dict: - return self.request.get('body', '') - - def add_header(self, header, value): - if self.type is requests.models.PreparedRequest: - self.request.headers[header] = value - if self.type is dict: - self.request['headers'].append((header, value)) - - -class AuthParams: - def __init__(self, data, vendor_key): - self._init_data(data, 'X-' + vendor_key + '-') - - def _init_data(self, data, prefix): - self._data = {} - for (k, v) in data: - if k.startswith(prefix): - self._data[k.replace(prefix, '').lower()] = v - - def get(self, name): - if name not in self._data: - raise EscherException('Missing authorization parameter: ' + name) - return self._data[name] - - def get_signed_headers(self): - return self.get('signedheaders').lower().split(';') - - def get_algo_data(self): - data = self.get('algorithm').split('-') - if len(data) != 3: - raise EscherException('Malformed Algorithm parameter') - return data - - def get_algo_prefix(self): - return self.get_algo_data()[0] - - def get_hash_algo(self): - return self.get_algo_data()[2].upper() - - def get_credential_data(self): - data = self.get('credentials').split('/', 2) - if len(data) != 3: - raise EscherException('Malformed Credentials parameter') - return data - - def get_credential_key(self): - return self.get_credential_data()[0] - - def get_credential_date(self): - return datetime.datetime.strptime(self.get_credential_data()[1], '%Y%m%d') - - def get_credential_scope(self): - return self.get_credential_data()[2] - - def get_expires(self): - return int(self.get('expires')) - - def get_request_date(self): - return datetime.datetime.strptime(self.get('date'), '%Y%m%dT%H%M%SZ') - - -class AuthenticationValidator: - def validate_mandatory_signed_headers(self, headers_to_sign): - if 'host' not in headers_to_sign: - raise EscherException('Host header is not signed') - - def validate_hash_algo(self, hash_algo): - if hash_algo not in ('SHA256', 'SHA512'): - raise EscherException('Only SHA256 and SHA512 hash algorithms are allowed') - - def validate_dates(self, current_date, request_date, credential_date, expires, clock_skew): - if request_date.strftime('%Y%m%d') != credential_date.strftime('%Y%m%d'): - raise EscherException('The request date and credential date do not match') - - min_date = current_date - datetime.timedelta(seconds=(clock_skew + expires)) - max_date = current_date + datetime.timedelta(seconds=clock_skew) - if request_date < min_date or request_date > max_date: - raise EscherException('Request date is not within the accepted time interval') - - def validate_credential_scope(self, expected, actual): - if actual != expected: - raise EscherException('Invalid credential scope (provided: ' + actual + ', required: ' + expected + ')') - - def validate_signature(self, expected, actual): - if expected != actual: - raise EscherException('The signatures do not match (provided: ' + actual + ', calculated: ' + expected + ')') - - -class Escher: - _normalize_path = re.compile('([^/]+/\.\./?|/\./|//|/\.$|/\.\.$)') - - def __init__(self, credential_scope, options={}): - self.credential_scope = credential_scope - self.algo_prefix = options.get('algo_prefix', 'ESR') - self.vendor_key = options.get('vendor_key', 'Escher') - self.hash_algo = options.get('hash_algo', 'SHA256') - self.current_time = options.get('current_time', datetime.datetime.utcnow()) - self.auth_header_name = options.get('auth_header_name', 'X-Escher-Auth') - self.date_header_name = options.get('date_header_name', 'X-Escher-Date') - self.clock_skew = options.get('clock_skew', 300) - self.algo = self.create_algo() - self.algo_id = self.algo_prefix + '-HMAC-' + self.hash_algo - - def sign(self, r, client, headers_to_sign=[]): - request = EscherRequest(r) - - for header in [self.date_header_name.lower(), 'host']: - if header not in headers_to_sign: - headers_to_sign.append(header) - - signature = self.generate_signature(client['api_secret'], request, headers_to_sign, self.current_time) - request.add_header(self.auth_header_name, ", ".join([ - self.algo_id + ' Credential=' + client['api_key'] + '/' + self.short_date( - self.current_time) + '/' + self.credential_scope, - 'SignedHeaders=' + self.prepare_headers_to_sign(headers_to_sign), - 'Signature=' + signature - ])) - return request.request - - def authenticate(self, r, key_db): - request = EscherRequest(r) - - auth_params = AuthParams(request.query_parts(), self.vendor_key) - validator = AuthenticationValidator() - - validator.validate_mandatory_signed_headers(auth_params.get_signed_headers()) - validator.validate_hash_algo(auth_params.get_hash_algo()) - validator.validate_dates( - self.current_time, - auth_params.get_request_date(), - auth_params.get_credential_date(), - auth_params.get_expires(), - self.clock_skew - ) - validator.validate_credential_scope(self.credential_scope, auth_params.get_credential_scope()) - - if auth_params.get_credential_key() not in key_db: - raise EscherException('Invalid Escher key') - - calculated_signature = self.generate_signature( - key_db[auth_params.get_credential_key()], request, - auth_params.get_signed_headers(), - auth_params.get_request_date() - ) - validator.validate_signature(calculated_signature, auth_params.get('signature')) - - return auth_params.get_credential_key() - - def hmac_digest(self, key, message, is_hex=False): - if not isinstance(key, bytes): - key = key.encode('utf-8') - digest = hmac.new(key, message.encode('utf-8'), self.algo) - if is_hex: - return digest.hexdigest() - return digest.digest() - - def generate_signature(self, api_secret, req, headers_to_sign, current_time): - canonicalized_request = self.canonicalize(req, headers_to_sign) - string_to_sign = self.get_string_to_sign(canonicalized_request, current_time) - - signing_key = self.hmac_digest(self.algo_prefix + api_secret, self.short_date(current_time)) - for data in self.credential_scope.split('/'): - signing_key = self.hmac_digest(signing_key, data) - - return self.hmac_digest(signing_key, string_to_sign, True) - - def canonicalize(self, req, headers_to_sign): - return "\n".join([ - req.method(), - self.canonicalize_path(req.path()), - self.canonicalize_query(req.query_parts()), - self.canonicalize_headers(req.headers(), headers_to_sign), - '', - self.prepare_headers_to_sign(headers_to_sign), - self.algo(req.body().encode('utf-8')).hexdigest() - ]) - - def canonicalize_path(self, path): - changes = 1 - while changes > 0: - path, changes = self._normalize_path.subn('/', path, 1) - return path - - def canonicalize_headers(self, headers, headers_to_sign): - headers_list = [] - for key, value in iter(sorted(headers)): - if key.lower() in headers_to_sign: - headers_list.append(key.lower() + ':' + self.normalize_white_spaces(value)) - return "\n".join(sorted(headers_list)) - - def normalize_white_spaces(self, value): - index = 0 - value_normalized = [] - pattern = re.compile(r'\s+') - for part in value.split('"'): - if index % 2 == 0: - part = pattern.sub(' ', part) - value_normalized.append(part) - index += 1 - return '"'.join(value_normalized).strip() - - def canonicalize_query(self, query_parts): - safe = "~+!'()*" - query_list = [] - for key, value in query_parts: - if key == 'X-' + self.vendor_key + '-Signature': - continue - query_list.append(quote(key, safe=safe) + '=' + quote(value, safe=safe)) - return "&".join(sorted(query_list)) - - def get_string_to_sign(self, canonicalized_request, current_time): - return "\n".join([ - self.algo_id, - self.long_date(current_time), - self.short_date(current_time) + '/' + self.credential_scope, - self.algo(canonicalized_request.encode('utf-8')).hexdigest() - ]) - - def create_algo(self): - if self.hash_algo == 'SHA256': - return sha256 - if self.hash_algo == 'SHA512': - return sha512 - - def long_date(self, time): - return time.strftime('%Y%m%dT%H%M%SZ') - - def short_date(self, time): - return time.strftime('%Y%m%d') - - def prepare_headers_to_sign(self, headers_to_sign): - return ";".join(sorted(headers_to_sign)) \ No newline at end of file From 715113c7e1f007eaf5750f8d7564da4e1af67bc9 Mon Sep 17 00:00:00 2001 From: Vamsi Date: Wed, 21 Feb 2024 21:11:21 +0530 Subject: [PATCH 11/11] fixed basedir --- backend/config.py | 6 +++--- backend/server_fastapi/config.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/config.py b/backend/config.py index fad51f4..1262773 100644 --- a/backend/config.py +++ b/backend/config.py @@ -47,10 +47,10 @@ class TestingConfig(Config): PREF_REST_API = "http://0.0.0.0:" DATABASE_URI = 'test' TESTING = True - WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "NGM5N2U2MDgtMzc4YS00NjY1LWFjN2MtMjBhNTM4MTgzNzAyOWNkMmI3YTYtYjJk_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") - WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "YTAzMGJmYmQtY2I1Ni00MGRmLWJlNWYtNDJjNjY1NmFjZjljM2RjODhmY2QtY2M4_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") + WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "") + WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "") - OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "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") + OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "") all_modules = {"openai":"backend.server.utils.openai_utils"} BOT_EMAIL = 'blazetranscriptionbot@webex.bot' diff --git a/backend/server_fastapi/config.py b/backend/server_fastapi/config.py index 81108d7..5fcf418 100644 --- a/backend/server_fastapi/config.py +++ b/backend/server_fastapi/config.py @@ -49,10 +49,10 @@ class TestingConfig(Config): PREF_REST_API = "http://0.0.0.0:" DATABASE_URI = 'test' TESTING = True - WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "NGM5N2U2MDgtMzc4YS00NjY1LWFjN2MtMjBhNTM4MTgzNzAyOWNkMmI3YTYtYjJk_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") - WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "YTAzMGJmYmQtY2I1Ni00MGRmLWJlNWYtNDJjNjY1NmFjZjljM2RjODhmY2QtY2M4_PF84_1eb65fdf-9643-417f-9974-ad72cae0e10f") + WEBEX_BOT_TOKEN = os.environ.get('WEBEX_BOT_TOKEN', "") + WEBEX_ACCESS_TOKEN = os.environ.get('WEBEX_ACCESS_TOKEN', "") - OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "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") + OPENAPI_KEY = os.environ.get('OPENAPI_KEY', "") all_modules = {"openai":"backend.server.utils.openai_utils"} BOT_EMAIL = 'blazetranscriptionbot@webex.bot'