Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP twitter retriever #1373

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions application/api/user/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,19 @@
"retriever": "brave_search",
}
)

if "twitter_search" in settings.RETRIEVERS_ENABLED:
data.append(

Check warning on line 484 in application/api/user/routes.py

View check run for this annotation

Codecov / codecov/patch

application/api/user/routes.py#L483-L484

Added lines #L483 - L484 were not covered by tests
{
"name": "Twitter Search",
"language": "en",
"date": "twitter_search",
"model": settings.EMBEDDINGS_NAME,
"location": "custom",
"tokens": "",
"retriever": "twitter_search",
}
)
except Exception as err:
return make_response(jsonify({"success": False, "error": str(err)}), 400)

Expand Down
5 changes: 3 additions & 2 deletions application/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class Settings(BaseSettings):
MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
UPLOAD_FOLDER: str = "inputs"
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search", "twitter_search"] # also brave_search

# LLM Cache
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
Expand Down Expand Up @@ -74,7 +74,8 @@ class Settings(BaseSettings):
LANCEDB_PATH: str = "/tmp/lancedb" # Path where LanceDB stores its local data
LANCEDB_TABLE_NAME: Optional[str] = "docsgpts" # Name of the table to use for storing vectors
BRAVE_SEARCH_API_KEY: Optional[str] = None

TWITTER_API_KEY: Optional[str] = None
TWITTER_API_KEY_SECRET: Optional[str] = None
FLASK_DEBUG_MODE: bool = False


Expand Down
1 change: 1 addition & 0 deletions application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ redis==5.0.1
referencing==0.30.2
regex==2024.9.11
requests==2.32.3
requests-oauthlib==2.0.0
retry==0.9.2
sentence-transformers==3.0.1
tiktoken==0.7.0
Expand Down
3 changes: 2 additions & 1 deletion application/retriever/retriever_creator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from application.retriever.classic_rag import ClassicRAG
from application.retriever.duckduck_search import DuckDuckSearch
from application.retriever.brave_search import BraveRetSearch

from application.retriever.twitter_search import TwitterRetSearch


class RetrieverCreator:
retrievers = {
'classic': ClassicRAG,
'duckduck_search': DuckDuckSearch,
'brave_search': BraveRetSearch,
'twitter_search': TwitterRetSearch,
'default': ClassicRAG
}

Expand Down
187 changes: 187 additions & 0 deletions application/retriever/twitter_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import sys
from application.retriever.base import BaseRetriever
from application.retriever.classic_rag import ClassicRAG
from application.core.settings import settings
from application.llm.llm_creator import LLMCreator
from application.utils import num_tokens_from_string
import requests
import base64


class TwitterRetSearch(BaseRetriever):

def __init__(
self,
question,
source,
chat_history,
prompt,
chunks=2,
token_limit=150,
gpt_model="docsgpt",
user_api_key=None,
):
self.question = question
self.source = source
self.chat_history = chat_history
self.prompt = prompt
self.chunks = chunks
self.gpt_model = gpt_model
self.token_limit = (

Check warning on line 30 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L24-L30

Added lines #L24 - L30 were not covered by tests
token_limit
if token_limit
< settings.MODEL_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
else settings.MODEL_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
)
self.user_api_key = user_api_key

Check warning on line 40 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L40

Added line #L40 was not covered by tests

def _get_data(self):
if self.chunks == 0:
docs = []

Check warning on line 44 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L43-L44

Added lines #L43 - L44 were not covered by tests
else:
# Question should ask llm to generate search query for twitter based on the question
llm_query = f"Generate a search term for the Twitter API based on: {self.question}. Provide single or multiple words without quotes."

Check warning on line 47 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L47

Added line #L47 was not covered by tests

messages_combine = [{"role": "user", "content": llm_query}]
llm = LLMCreator.create_llm(

Check warning on line 50 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L49-L50

Added lines #L49 - L50 were not covered by tests
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
)

completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
twitter_search_query = ""
for line in completion:
twitter_search_query += str(line)

Check warning on line 57 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L54-L57

Added lines #L54 - L57 were not covered by tests

results = self.search_tweets(twitter_search_query, count=int(self.chunks))

Check warning on line 59 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L59

Added line #L59 was not covered by tests

# TODO work on processing the results json below by following proper schema of x api

docs = []
for i in results:
try:
title = i["title"]
link = i["link"]
snippet = i["snippet"]
docs.append({"text": snippet, "title": title, "link": link})
except IndexError:
pass
if settings.LLM_NAME == "llama.cpp":
docs = [docs[0]]

Check warning on line 73 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L63-L73

Added lines #L63 - L73 were not covered by tests

return docs

Check warning on line 75 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L75

Added line #L75 was not covered by tests

def gen(self):
docs = self._get_data()

Check warning on line 78 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L78

Added line #L78 was not covered by tests

# join all page_content together with a newline
docs_together = "\n".join([doc["text"] for doc in docs])
p_chat_combine = self.prompt.replace("{summaries}", docs_together)
messages_combine = [{"role": "system", "content": p_chat_combine}]
for doc in docs:
yield {"source": doc}

Check warning on line 85 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L81-L85

Added lines #L81 - L85 were not covered by tests

if len(self.chat_history) > 1:
tokens_current_history = 0

Check warning on line 88 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L87-L88

Added lines #L87 - L88 were not covered by tests
# count tokens in history
self.chat_history.reverse()
for i in self.chat_history:
if "prompt" in i and "response" in i:
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(

Check warning on line 93 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L90-L93

Added lines #L90 - L93 were not covered by tests
i["response"]
)
if tokens_current_history + tokens_batch < self.token_limit:
tokens_current_history += tokens_batch
messages_combine.append(

Check warning on line 98 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L96-L98

Added lines #L96 - L98 were not covered by tests
{"role": "user", "content": i["prompt"]}
)
messages_combine.append(

Check warning on line 101 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L101

Added line #L101 was not covered by tests
{"role": "system", "content": i["response"]}
)
messages_combine.append({"role": "user", "content": self.question})

Check warning on line 104 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L104

Added line #L104 was not covered by tests

llm = LLMCreator.create_llm(

Check warning on line 106 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L106

Added line #L106 was not covered by tests
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
)

completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
for line in completion:
yield {"answer": str(line)}

Check warning on line 112 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L110-L112

Added lines #L110 - L112 were not covered by tests

def search(self):
return self._get_data()

Check warning on line 115 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L115

Added line #L115 was not covered by tests

def get_params(self):
return {

Check warning on line 118 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L118

Added line #L118 was not covered by tests
"question": self.question,
"source": self.source,
"chat_history": self.chat_history,
"prompt": self.prompt,
"chunks": self.chunks,
"token_limit": self.token_limit,
"gpt_model": self.gpt_model,
"user_api_key": self.user_api_key
}


def get_bearer_token(self, consumer_key, consumer_secret):

# Step 1: Concatenate with a colon
bearer_token_credentials = f"{consumer_key}:{consumer_secret}"

Check warning on line 133 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L133

Added line #L133 was not covered by tests

# Step 2: Base64 encode the concatenated string
base64_encoded_credentials = base64.b64encode(bearer_token_credentials.encode('utf-8')).decode('utf-8')

Check warning on line 136 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L136

Added line #L136 was not covered by tests

# Step 3: Obtain Bearer Token
url = 'https://api.x.com/oauth2/token'
headers = {

Check warning on line 140 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L139-L140

Added lines #L139 - L140 were not covered by tests
'Authorization': f'Basic {base64_encoded_credentials}',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
}
data = {

Check warning on line 144 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L144

Added line #L144 was not covered by tests
'grant_type': 'client_credentials'
}

# Make the POST request to get the Bearer Token
response = requests.post(url, headers=headers, data=data)

Check warning on line 149 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L149

Added line #L149 was not covered by tests

# Check if the request was successful
if response.status_code == 200:
token_response = response.json()
return token_response.get('access_token')

Check warning on line 154 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L152-L154

Added lines #L152 - L154 were not covered by tests
else:
raise Exception(f"Failed to get bearer token: {response.status_code}, {response.text}")

Check warning on line 156 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L156

Added line #L156 was not covered by tests


# Function to search for tweets using Twitter API v1.1
def search_tweets(self, search_term):
oauth2_token = self.get_bearer_token(settings.TWITTER_API_KEY, settings.TWITTER_API_KEY_SECRET)
print(oauth2_token, file=sys.stderr)

Check warning on line 162 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L161-L162

Added lines #L161 - L162 were not covered by tests
# Parameters for the search query
params = {

Check warning on line 164 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L164

Added line #L164 was not covered by tests
'query': search_term,
}

# Make the GET request using httpx
SEARCH_URL = "https://api.twitter.com/2/tweets/search/recent"
headers = {

Check warning on line 170 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L169-L170

Added lines #L169 - L170 were not covered by tests
'Authorization': f'Bearer {oauth2_token}'
}
response = requests.get(SEARCH_URL, headers=headers, params=params)

Check warning on line 173 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L173

Added line #L173 was not covered by tests

print(response.status_code, file=sys.stderr)

Check warning on line 175 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L175

Added line #L175 was not covered by tests

# Check if the response is OK
if response.status_code != 200:
print(response.text, file=sys.stderr)
raise Exception(f"Request failed: {response.status_code} {response.text}")

Check warning on line 180 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L178-L180

Added lines #L178 - L180 were not covered by tests

# Parse the JSON response
tweet_data = response.json()

Check warning on line 183 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L183

Added line #L183 was not covered by tests

# Extract and return relevant tweet information
return tweet_data.get('statuses', [])

Check warning on line 186 in application/retriever/twitter_search.py

View check run for this annotation

Codecov / codecov/patch

application/retriever/twitter_search.py#L186

Added line #L186 was not covered by tests

Loading