-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into chat-wrapper
- Loading branch information
Showing
12 changed files
with
5,123 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import pandas as pd | ||
|
||
from config import POSTGRES_ENGINE | ||
from sentence_transformers import SentenceTransformer | ||
|
||
def embedding(dataframe, column): | ||
""" | ||
Creates embeddings for text in the passed column | ||
""" | ||
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1') | ||
|
||
model_embeddings = model.encode(dataframe[column].to_list()) | ||
dataframe['embedding'] = model_embeddings.tolist() | ||
|
||
return dataframe | ||
|
||
|
||
def create_table(): | ||
POSTGRES_ENGINE.execute("CREATE TABLE IF NOT EXISTS datausa_tables.cubes (table_name text, table_description text, embedding vector(384))") | ||
return | ||
|
||
|
||
def load_data_to_db(df): | ||
|
||
print(df.head()) | ||
|
||
df_embeddings = embedding(df, 'table_description') | ||
df_embeddings.to_sql('cubes', con=POSTGRES_ENGINE, if_exists='append', index=False, schema='datausa_tables') | ||
|
||
return | ||
|
||
|
||
df = pd.DataFrame() | ||
|
||
df["table_name"] = ["Data_USA_House_election"] | ||
df['table_description'] = ["Table 'Data_USA_House_election' contains House election data, including number of votes by candidate, party and state."] | ||
|
||
create_table() | ||
|
||
load_data_to_db(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import pandas as pd | ||
import requests | ||
import urllib.parse | ||
|
||
from config import POSTGRES_ENGINE | ||
from sentence_transformers import SentenceTransformer | ||
|
||
def embedding(dataframe, column): | ||
""" | ||
Creates embeddings for text in the passed column | ||
""" | ||
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1') | ||
|
||
model_embeddings = model.encode(dataframe[column].to_list()) | ||
dataframe['embedding'] = model_embeddings.tolist() | ||
|
||
return dataframe | ||
|
||
|
||
def create_table(): | ||
POSTGRES_ENGINE.execute("CREATE TABLE IF NOT EXISTS datausa_drilldowns.drilldowns (product_id text, product_name text, cube_name text, drilldown text, embedding vector(384))") | ||
return | ||
|
||
|
||
def get_data_from_api(api_url): | ||
try: | ||
r = requests.get(api_url) | ||
df = pd.DataFrame.from_dict(r.json()['data']) | ||
except: raise ValueError('Invalid API url:', api_url) | ||
|
||
return df | ||
|
||
|
||
def get_api_params(api_url): | ||
parsed_url = urllib.parse.urlparse(api_url) | ||
query_params = urllib.parse.parse_qs(parsed_url.query) | ||
|
||
cube = query_params.get('cube', [''])[0] | ||
drilldown = query_params.get('drilldowns', [''])[0] | ||
|
||
cube_name = cube.replace('+', ' ') | ||
drilldown = drilldown.replace('+', ' ') | ||
|
||
return cube_name, drilldown | ||
|
||
|
||
def load_data_to_db(api_url, measure_name): | ||
cube_name, drilldown = get_api_params(api_url) | ||
df = get_data_from_api(api_url=api_url) | ||
|
||
df.rename(columns={f"{drilldown}": "drilldown_name", f"{drilldown} ID": "drilldown_id"}, inplace=True) | ||
|
||
df['cube_name'] = f"{cube_name}" | ||
df['drilldown'] = f"{drilldown}" | ||
df.drop(f"{measure_name}", axis=1, inplace=True) | ||
|
||
if 'drilldown_id' not in df.columns: | ||
df['drilldown_id'] = df['drilldown'] | ||
|
||
df.replace('', pd.NA, inplace=True) | ||
df.dropna(subset=['drilldown_name', 'drilldown_id'], how='all', inplace=True) | ||
|
||
print(df.head()) | ||
|
||
#df_embeddings = embedding(df, 'product_name') | ||
#df_embeddings.to_sql('drilldowns', con=POSTGRES_ENGINE, if_exists='append', index=False, schema='datausa_drilldowns') | ||
|
||
return | ||
|
||
|
||
print("Enter API url: ") | ||
api_url = input() | ||
print("Enter measure name: ") | ||
measure_name = input() | ||
#df = pd.read_csv('/Users/alexandrabjanes/Datawheel/CODE/datausa-chat/tables.csv') | ||
#print(df.head()) | ||
|
||
#create_table() | ||
load_data_to_db(api_url, measure_name = measure_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.