Skip to content

Commit

Permalink
WIP twitter retriecer
Browse files Browse the repository at this point in the history
  • Loading branch information
shatanikmahanty committed Oct 23, 2024
1 parent 4dd0d65 commit db3a918
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 3 deletions.
13 changes: 13 additions & 0 deletions application/api/user/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,19 @@ def get(self):
"retriever": "brave_search",
}
)

if "twitter_search" in settings.RETRIEVERS_ENABLED:
data.append(

Check warning on line 484 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L483-L484

Added lines #L483 - L484 were not covered by tests
{
"name": "Twitter Search",
"language": "en",
"date": "twitter_search",
"model": settings.EMBEDDINGS_NAME,
"location": "custom",
"tokens": "",
"retriever": "twitter_search",
}
)
except Exception as err:
return make_response(jsonify({"success": False, "error": str(err)}), 400)

Expand Down
5 changes: 3 additions & 2 deletions application/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Settings(BaseSettings):
MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
UPLOAD_FOLDER: str = "inputs"
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search", "twitter_search"] # also brave_search

# LLM Cache
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
Expand Down Expand Up @@ -74,7 +74,8 @@ class Settings(BaseSettings):
LANCEDB_PATH: str = "/tmp/lancedb" # Path where LanceDB stores its local data
LANCEDB_TABLE_NAME: Optional[str] = "docsgpts" # Name of the table to use for storing vectors
BRAVE_SEARCH_API_KEY: Optional[str] = None

TWITTER_API_KEY: Optional[str] = None
TWITTER_API_KEY_SECRET: Optional[str] = None
FLASK_DEBUG_MODE: bool = False


Expand Down
1 change: 1 addition & 0 deletions application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ redis==5.0.1
referencing==0.30.2
regex==2024.9.11
requests==2.32.3
requests-oauthlib==2.0.0
retry==0.9.2
sentence-transformers==3.0.1
tiktoken==0.7.0
Expand Down
3 changes: 2 additions & 1 deletion application/retriever/retriever_creator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from application.retriever.classic_rag import ClassicRAG
from application.retriever.duckduck_search import DuckDuckSearch
from application.retriever.brave_search import BraveRetSearch

from application.retriever.twitter_search import TwitterRetSearch


class RetrieverCreator:
retrievers = {
'classic': ClassicRAG,
'duckduck_search': DuckDuckSearch,
'brave_search': BraveRetSearch,
'twitter_search': TwitterRetSearch,
'default': ClassicRAG
}

Expand Down
187 changes: 187 additions & 0 deletions application/retriever/twitter_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import sys
from application.retriever.base import BaseRetriever
from application.retriever.classic_rag import ClassicRAG
from application.core.settings import settings
from application.llm.llm_creator import LLMCreator
from application.utils import num_tokens_from_string
import requests
import base64


class TwitterRetSearch(BaseRetriever):

def __init__(
self,
question,
source,
chat_history,
prompt,
chunks=2,
token_limit=150,
gpt_model="docsgpt",
user_api_key=None,
):
self.question = question
self.source = source
self.chat_history = chat_history
self.prompt = prompt
self.chunks = chunks
self.gpt_model = gpt_model
self.token_limit = (

Check warning on line 30 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L24-L30

Added lines #L24 - L30 were not covered by tests
token_limit
if token_limit
< settings.MODEL_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
else settings.MODEL_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
)
self.user_api_key = user_api_key

Check warning on line 40 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L40

Added line #L40 was not covered by tests

def _get_data(self):
if self.chunks == 0:
docs = []

Check warning on line 44 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L43-L44

Added lines #L43 - L44 were not covered by tests
else:
# Question should ask llm to generate search query for twitter based on the question
llm_query = f"Generate a search term for the Twitter API based on: {self.question}. Provide single or multiple words without quotes."

Check warning on line 47 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L47

Added line #L47 was not covered by tests

messages_combine = [{"role": "user", "content": llm_query}]
llm = LLMCreator.create_llm(

Check warning on line 50 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L49-L50

Added lines #L49 - L50 were not covered by tests
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
)

completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
twitter_search_query = ""
for line in completion:
twitter_search_query += str(line)

Check warning on line 57 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L54-L57

Added lines #L54 - L57 were not covered by tests

results = self.search_tweets(twitter_search_query, count=int(self.chunks))

Check warning on line 59 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L59

Added line #L59 was not covered by tests

# TODO work on processing the results json below by following proper schema of x api

docs = []
for i in results:
try:
title = i["title"]
link = i["link"]
snippet = i["snippet"]
docs.append({"text": snippet, "title": title, "link": link})
except IndexError:
pass
if settings.LLM_NAME == "llama.cpp":
docs = [docs[0]]

Check warning on line 73 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L63-L73

Added lines #L63 - L73 were not covered by tests

return docs

Check warning on line 75 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L75

Added line #L75 was not covered by tests

def gen(self):
docs = self._get_data()

Check warning on line 78 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L78

Added line #L78 was not covered by tests

# join all page_content together with a newline
docs_together = "\n".join([doc["text"] for doc in docs])
p_chat_combine = self.prompt.replace("{summaries}", docs_together)
messages_combine = [{"role": "system", "content": p_chat_combine}]
for doc in docs:
yield {"source": doc}

Check warning on line 85 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L81-L85

Added lines #L81 - L85 were not covered by tests

if len(self.chat_history) > 1:
tokens_current_history = 0

Check warning on line 88 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L87-L88

Added lines #L87 - L88 were not covered by tests
# count tokens in history
self.chat_history.reverse()
for i in self.chat_history:
if "prompt" in i and "response" in i:
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(

Check warning on line 93 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L90-L93

Added lines #L90 - L93 were not covered by tests
i["response"]
)
if tokens_current_history + tokens_batch < self.token_limit:
tokens_current_history += tokens_batch
messages_combine.append(

Check warning on line 98 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L96-L98

Added lines #L96 - L98 were not covered by tests
{"role": "user", "content": i["prompt"]}
)
messages_combine.append(

Check warning on line 101 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L101

Added line #L101 was not covered by tests
{"role": "system", "content": i["response"]}
)
messages_combine.append({"role": "user", "content": self.question})

Check warning on line 104 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L104

Added line #L104 was not covered by tests

llm = LLMCreator.create_llm(

Check warning on line 106 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L106

Added line #L106 was not covered by tests
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
)

completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
for line in completion:
yield {"answer": str(line)}

Check warning on line 112 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L110-L112

Added lines #L110 - L112 were not covered by tests

def search(self):
return self._get_data()

Check warning on line 115 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L115

Added line #L115 was not covered by tests

def get_params(self):
return {

Check warning on line 118 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L118

Added line #L118 was not covered by tests
"question": self.question,
"source": self.source,
"chat_history": self.chat_history,
"prompt": self.prompt,
"chunks": self.chunks,
"token_limit": self.token_limit,
"gpt_model": self.gpt_model,
"user_api_key": self.user_api_key
}


def get_bearer_token(self, consumer_key, consumer_secret):

# Step 1: Concatenate with a colon
bearer_token_credentials = f"{consumer_key}:{consumer_secret}"

Check warning on line 133 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L133

Added line #L133 was not covered by tests

# Step 2: Base64 encode the concatenated string
base64_encoded_credentials = base64.b64encode(bearer_token_credentials.encode('utf-8')).decode('utf-8')

Check warning on line 136 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L136

Added line #L136 was not covered by tests

# Step 3: Obtain Bearer Token
url = 'https://api.x.com/oauth2/token'
headers = {

Check warning on line 140 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L139-L140

Added lines #L139 - L140 were not covered by tests
'Authorization': f'Basic {base64_encoded_credentials}',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
}
data = {

Check warning on line 144 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L144

Added line #L144 was not covered by tests
'grant_type': 'client_credentials'
}

# Make the POST request to get the Bearer Token
response = requests.post(url, headers=headers, data=data)

Check warning on line 149 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L149

Added line #L149 was not covered by tests

# Check if the request was successful
if response.status_code == 200:
token_response = response.json()
return token_response.get('access_token')

Check warning on line 154 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L152-L154

Added lines #L152 - L154 were not covered by tests
else:
raise Exception(f"Failed to get bearer token: {response.status_code}, {response.text}")

Check warning on line 156 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L156

Added line #L156 was not covered by tests


# Function to search for tweets using Twitter API v1.1
def search_tweets(self, search_term):
oauth2_token = self.get_bearer_token(settings.TWITTER_API_KEY, settings.TWITTER_API_KEY_SECRET)
print(oauth2_token, file=sys.stderr)

Check warning on line 162 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L161-L162

Added lines #L161 - L162 were not covered by tests
# Parameters for the search query
params = {

Check warning on line 164 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L164

Added line #L164 was not covered by tests
'query': search_term,
}

# Make the GET request using httpx
SEARCH_URL = "https://api.twitter.com/2/tweets/search/recent"
headers = {

Check warning on line 170 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L169-L170

Added lines #L169 - L170 were not covered by tests
'Authorization': f'Bearer {oauth2_token}'
}
response = requests.get(SEARCH_URL, headers=headers, params=params)

Check warning on line 173 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L173

Added line #L173 was not covered by tests

print(response.status_code, file=sys.stderr)

Check warning on line 175 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L175

Added line #L175 was not covered by tests

# Check if the response is OK
if response.status_code != 200:
print(response.text, file=sys.stderr)
raise Exception(f"Request failed: {response.status_code} {response.text}")

Check warning on line 180 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L178-L180

Added lines #L178 - L180 were not covered by tests

# Parse the JSON response
tweet_data = response.json()

Check warning on line 183 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L183

Added line #L183 was not covered by tests

# Extract and return relevant tweet information
return tweet_data.get('statuses', [])

Check warning on line 186 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L186

Added line #L186 was not covered by tests

0 comments on commit db3a918

Please sign in to comment.