Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Research agent #8

Merged
merged 25 commits into from
Apr 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e3050d3
Support passing BasePromptTemplates as prompts into LLMTool
ZmeiGorynych Apr 20, 2024
d3bab13
Start of blog post draft example.
ZmeiGorynych Apr 20, 2024
54596f5
Introduce Kuzu graph store
whimo Apr 25, 2024
4df5afc
First cut of question insertion tool
ZmeiGorynych Apr 25, 2024
8e609dc
Merge remote-tracking branch 'origin/research-agent' into research_as…
ZmeiGorynych Apr 25, 2024
dc0c4c9
Graph store implementation draft
whimo Apr 25, 2024
e6ff0a1
Remove redundant inheritance
whimo Apr 25, 2024
5efcf7c
Question generator and inserter tools
ZmeiGorynych Apr 26, 2024
43cd69c
Merge branch 'research-agent' of https://github.com/ShoggothAI/motley…
ZmeiGorynych Apr 26, 2024
bb691e8
Research agent directory in examples
whimo Apr 26, 2024
068f5f4
WIP: research agent orchestrator
whimo Apr 26, 2024
75071a4
Kuzu graph store entity creation fix
whimo Apr 26, 2024
6d33b69
Question prioritizer first cut
ZmeiGorynych Apr 26, 2024
330e692
Question prioritizer first cut
ZmeiGorynych Apr 26, 2024
efd1697
Merge branch 'research-agent' of https://github.com/ShoggothAI/motley…
ZmeiGorynych Apr 26, 2024
1876740
Knowledge gaining orchestrator working implementation
whimo Apr 26, 2024
e3a4562
Draft question answerer
ZmeiGorynych Apr 26, 2024
4be06ed
Merge branch 'research-agent' of https://github.com/ShoggothAI/motley…
ZmeiGorynych Apr 26, 2024
e8deaec
tweak update_properties
ZmeiGorynych Apr 26, 2024
4df1983
Ingestion sort of works
ZmeiGorynych Apr 26, 2024
e0b5ea4
WIP question answerer
whimo Apr 26, 2024
d219c0c
Wrap ingestion as MotleyTool
ZmeiGorynych Apr 26, 2024
dcec705
Research agent: first working implementation
whimo Apr 27, 2024
1dc5f48
Increase MAX_ITER in research agent example
whimo Apr 27, 2024
aa023a3
Merge branch 'main' into research-agent
whimo Apr 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions examples/blog_post/blog_post.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from typing import Union, Sequence, List

from dotenv import load_dotenv

from llama_index.graph_stores.kuzu import KuzuGraphStore
from langchain.schema import AIMessage, HumanMessage, SystemMessage, BaseMessage
from langchain_core.prompts.chat import ChatPromptTemplate
from motleycrew.agent.langchain.react import ReactMotleyAgent

from motleycrew.tool.llm_tool import LLMTool
from motleycrew import MotleyCrew, Task

from .blog_post_input import text

load_dotenv()

# TODO: switch example to using URL instead of fixed text?
# from langchain.document_loaders import UnstructuredURLLoader
# from langchain.text_splitter import TokenTextSplitter
# def urls_to_messages(urls: Union[str, Sequence[str]]) -> List[HumanMessage]:
# if isinstance(urls, str):
# urls = [urls]
# loader = UnstructuredURLLoader(urls=urls)
# data = loader.load()
# text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
# texts = text_splitter.split_documents(data)
# return [HumanMessage(content=d.page_content) for d in texts]


max_words = 500
min_words = 450

editor_prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(
content="You are an experienced online blog post editor with 10 years of experience."
),
HumanMessage(
content="""Review the blog post draft below (delimited by triple backticks)
and provide a critique and use specific examples from the text on what
should be done to improve the draft, with data professionals as the intended audience.
Also, suggest a catchy title for the story.
```{input}```
"""
),
]
)

illustrator_prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(
content="You are a professional illustrator with 10 years of experience."
),
HumanMessage(
content="You are given the following draft story, delimited by triple back quotes: ```{second_draft}```"
),
HumanMessage(
content="""Your task is to specify the illustrations that would fit this story.
Make sure the illustrations are varied in style, eye-catching, and some of them humorous.
Describe each illustration in a way suitable for entering in a Midjourney prompt.
Each description should be detailed and verbose. Don't explain the purpose of the illustrations,
just describe in great
detail what each illustration should show, in a way suitable for a generative image prompt.
There should be at most 5 and at least 3 illustrations.
Return the illustration descriptions as a list in the format
["...", "...", ..., "..."]
"""
),
]
)

seo_prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(
content="""Act as an SEO expert with 10 years of experience but ensure to
explain any SEO jargon for clarity when using it."""
),
HumanMessage(
content="""Review the blog post below (delimited by triple back quotes) and provide specific
examples from the text where to optimize its SEO content.
Recommend SEO-friendly titles and subtitles that could be used.
```{second_draft}```
"""
),
]
)

editor = LLMTool(
name="editor",
description="An editor providing constructive suggestions to improve the blog post submitted to it",
prompt=editor_prompt,
)

# TODO: Turn it into an agent that calls the DALL-E tool
# and returns a dict {image_description: image_url}
illustrator = LLMTool(
name="illustrator",
description="An illustrator providing detailed descriptions of illustrations for a story",
prompt=illustrator_prompt,
)

seo_expert = LLMTool(
name="seo_expert",
description="An SEO expert providing SEO optimization suggestions",
prompt=seo_prompt,
)


writer = ReactMotleyAgent(
prompt="You are a professional freelance copywriter with 10 years of experience.",
tools=[editor, illustrator, seo_expert],
)

# Create tasks for your agents
crew = MotleyCrew()
task1 = Task(
crew=crew,
name="Write a blog post from the provided information",
description=f"""Write a blog post of at most {max_words} words and at least {min_words}
words based on the information provided. Keep the tone suitable for an audience of
data professionals, avoid superlatives and an overly excitable tone.
Don't discuss installation or testing.
The summary will be provided in one or multiple chunks, followed by <END>.

Proceed as follows: first, write a draft blog post as described above.
Then, submit it in turn to the editor, illustrator, and SEO expert for feedback.
In the case of the illustrator, insert the illustration descriptions it provides in
square brackets into the appropriate places in the draft.
In each case, revise the draft as per the response of the expert and submit it to the next expert.

After you have implemented each expert's recommendations, return the final draft in markdown format.

Return the blog post in markdown format.
Information begins: {text} <END>""",
agent=writer,
)

crew.run(verbose=2)
102 changes: 102 additions & 0 deletions examples/blog_post/blog_post_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
text = """
Wise Pizza: A library for automated figuring out most unusual segments
WisePizza is a library to find and visualise the most interesting slices in multidimensional data based on Lasso and LP solvers, which provides different functions to find segments whose average is most different from the global one or find segments most useful in explaining the difference between two datasets.

The approach
WisePizza assumes you have a dataset with a number of discrete dimensions (could be currency, region, etc). For each combination of dimensions, the dataset must have a total value (total of the metric over that segment, for example the total volume in that region and currency), and an optional size value (set to 1 if not specified), this could for example be the total number of customers for that region and currency. The average value of the outcome for the segment is defined as total divided by size, in this example it would be the average volume per customer.

explain_levels takes such a dataset and looks for a small number of 'simple' segments (each only constraining a small number of dimensions) that between them explain most of the variation in the averages; you could also think of them as the segments whose size-weighted deviation from the overall dataset average is the largest. This trades off unusual averages (which will naturally occur more for smaller segments) against segment size.

Yet another way of looking at it is that we look for segments which, if their average was reset to the overall dataset average, would move overall total the most.

explain_changes_in_totals and explain_changes_in_average take two datasets of the kind described above, with the same column names, and apply the same kind of logic to find the segments that contribute most to the difference (in total or average, respectively) between the two datasets, optionally splitting that into contributions from changes in segment size and changes in segment total.

Sometimes, rather than explaining the change in totals from one period to the next, one wishes to explain a change in averages. The analytics of this are a little different - for example, while (as long as all weights and totals are positive) increasing a segment size (other things remaining equal) always increases the overall total, it can increase or decrease the pverall average, depending on whether the average value of that segment is below or above the overall average.

Table of Contents
What can this do for you?
Find interesting slices
Comparison between two datasets
Installation
Quick Start
For Developers
Tests
What can this do for you?
The automated search for interesting segments can give you the following:

1. Better information about segments and subsegments in your data
By using WisePizza and defining initial segments, you can find a segment which maximizes a specific outcome, such as adoption rates.

2. Understanding differences in two time periods or two dataframes
If you have two time periods or two datasets, you can find segments that experience the largest change in the totals from previous period/dataset.

Installation
You can always get the newest wise_pizza release using pip: https://pypi.org/project/wise-pizza/

pip install wise-pizza
From the command line (another way):

pip install git+https://github.com/transferwise/wise-pizza.git
From Jupyter notebook (another way):

!pip install git+https://github.com/transferwise/wise-pizza.git
Or you can clone and run from source, in which case you should pip -r requirements.txt before running.

Quick Start
The wisepizza package can be used for finding segments with unusual average:

sf = explain_levels(
df=data,
dims=dims,
total_name=totals,
size_name=size,
max_depth=2,
min_segments=20,
solver="lasso"
)
plot

Or for finding changes between two datasets in totals:

sf1 = explain_changes_in_totals(
df1=pre_data,
df2=data,
dims=dims,
total_name=totals,
size_name=size,
max_depth=2,
min_segments=20,
how="totals",
solver="lasso"
)
plot

Or for finding changes between two datasets in average:

sf1 = explain_changes_in_average(
df1=pre_data,
df2=data,
dims=dims,
total_name=totals,
size_name=size,
max_depth=2,
min_segments=20,
how="totals",
solver="lasso"
)
plot

And then you can visualize differences:

sf.plot()
And check segments:

sf.segments
Please see the full example here

For Developers
Testing
We use PyTest for testing. If you want to contribute code, make sure that the tests in tests/ run without errors.

Wise-pizza is open sourced and maintained by Wise Plc. Copyright 2023 Wise Plc.
"""
4 changes: 3 additions & 1 deletion examples/image_generation_crewai.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os.path

from dotenv import load_dotenv

from motleycrew import MotleyCrew, Task
Expand All @@ -8,7 +10,7 @@
load_dotenv()
configure_logging(verbose=True)

image_generator_tool = DallEImageGeneratorTool()
image_generator_tool = DallEImageGeneratorTool(os.path.realpath("./images"))
# For saving images locally use the line below
# image_generator_tool = DallEImageGeneratorTool(images_directory="images")

Expand Down
68 changes: 68 additions & 0 deletions examples/research_agent/answer_orchestrator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import logging
import kuzu

from motleycrew.storage import MotleyGraphStore

from question_struct import Question
from question_answerer import AnswerSubQuestionTool


class AnswerOrchestrator:
def __init__(self, storage: MotleyGraphStore, answer_length: int):
self.storage = storage
self.question_answering_tool = AnswerSubQuestionTool(graph=self.storage, answer_length=answer_length)

def get_unanswered_available_questions(self) -> list[Question]:
query = (
"MATCH (n1:{}) "
"WHERE n1.answer IS NULL AND n1.context IS NOT NULL "
"AND NOT EXISTS {{MATCH (n1)-[]->(n2:{}) "
"WHERE n2.answer IS NULL AND n2.context IS NOT NULL}} "
"RETURN n1"
).format(self.storage.node_table_name, self.storage.node_table_name)

query_result = self.storage.run_cypher_query(query)
return [Question.deserialize(row[0]) for row in query_result]

def __call__(self) -> Question | None:
last_question = None

while True:
questions = self.get_unanswered_available_questions()
logging.info("Available questions: %s", questions)

if not len(questions):
logging.info("All questions answered!")
break
else:
last_question = questions[0]
logging.info("Running answerer for question %s", last_question)
self.question_answering_tool.invoke({"question": last_question})

if not last_question:
logging.warning("Nothing to answer!")
return

return Question.deserialize(self.storage.get_entity(last_question.id))


if __name__ == "__main__":
from pathlib import Path
from dotenv import load_dotenv
from motleycrew.storage import MotleyKuzuGraphStore
from motleycrew.common.utils import configure_logging

load_dotenv()
configure_logging(verbose=True)

here = Path(__file__).parent
db_path = here / "research_db"

db = kuzu.Database(db_path)
storage = MotleyKuzuGraphStore(
db, node_table_schema={"question": "STRING", "answer": "STRING", "context": "STRING"}
)

orchestrator = AnswerOrchestrator(storage=storage, answer_length=30)
result = orchestrator()
print(result)
Loading
Loading