Skip to content

Commit

Permalink
Adjusted selenium to monitor for chat box being enabled to determine …
Browse files Browse the repository at this point in the history
…LLM completion. More robust. Added test for search from vector store
  • Loading branch information
dividor committed Jul 10, 2024
1 parent 8a9fa3d commit 12439e6
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 24 deletions.
46 changes: 22 additions & 24 deletions flows/chainlit-ui-evaluation/call_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,10 @@ def check_element_exists(element, by, value):
return False


def poll_page(element_name=MARKDOWN_BODY_CLASS):
def poll_page(element_id=CHAT_INPUT_CLASS):
"""
Polls the page for new messages until a new message appears or a timeout occurs.
It does this by monitoring the chat box to see when it becomes enabled.
Args:
driver: The WebDriver instance used to interact with the web page.
Expand All @@ -84,20 +85,20 @@ def poll_page(element_name=MARKDOWN_BODY_CLASS):
Returns:
None
"""
markdown_body_elements = driver.find_elements(By.CLASS_NAME, element_name)
chats = len(markdown_body_elements)

# Loop waiting for the new message to appear, or timeout
chat_box = driver.find_element(By.ID, element_id)
is_disabled = chat_box.get_attribute("disabled")

# Loop waiting for the chat box to become enabled, indicating agent output complete
tot_time = 0
while len(markdown_body_elements) == chats:
while is_disabled is not None:
print(f" ... {tot_time} s")
time.sleep(POLL_TIME)
markdown_body_elements = driver.find_elements(By.CLASS_NAME, element_name)
chat_box = driver.find_element(By.ID, element_id)
is_disabled = chat_box.get_attribute("disabled")
tot_time += POLL_TIME
if tot_time > TIMEOUT_TIME:
print(
f"ERROR: Timed out waiting for new message to appear in {element_name}"
)
print(f"ERROR: Timed out waiting for chat box {element_id} to re-enable ")
return False

return True
Expand Down Expand Up @@ -154,11 +155,8 @@ def send_message(message, num_tries=0, tot_tries=3):
print(f"Failed to send message after {tot_tries} tries")
return ["ERROR: TIMED OUT SENDING MESSAGE"]

# Poll for the new message to appear, or timeout
status = poll_page(MARKDOWN_BODY_CLASS)
if status is False:
print("Failed to get response")
return ["ERROR: TIMED OUT WAITING FOR RESPONSE"]
# Poll page waiting for output to complete
poll_page(CHAT_INPUT_CLASS)

history = get_history()
len_history_new = len(history)
Expand Down Expand Up @@ -310,18 +308,18 @@ def call_assistant(query, chat_history):

if __name__ == "__main__":

chat_history = [
"Hello! How can I assist you today?",
"What is the total population of Mali",
"plot a line chart of fatalities by month for Chad using HDX data as an image",
"Plot population pyramids for Nigeria",
"How many rows does the population table have for Nigeria",
"Plot f{x}=10",
]

# chat_history = [
# "Hello! How can I assist you today?",
# "What is the total population of Mali",
# "plot a line chart of fatalities by month for Chad using HDX data as an image",
# "Plot population pyramids for Nigeria",
# "How many rows does the population table have for Nigeria",
# "Plot f{x}=10",
# ]
# user_input = chat_history[4]
# print(user_input)
# user_input="Plot f{x}=10"

# user_input="Is your data updated in real time?"
# call_assistant(user_input, "[]")
# sys.exit()

Expand Down
1 change: 1 addition & 0 deletions flows/chainlit-ui-evaluation/data.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
{"test_scenario": "Image answer from recipe", "query": "Plot population pyramids for Nigeria", "chat_history": "[]", "context": "The answer is:\n\n Image cksum: 7940162caf0e79eba9caae30c2955a6e\nImage description: {'content': \"The image is a population pyramid for Nigeria (NGA). It is a bar chart that displays the distribution of various age groups in the population, divided by gender. The x-axis represents the population in millions, with males on the left side (in blue) and females on the right side (in pink). The y-axis represents the age range, divided into 5-year intervals from 0-4 up to 80+.\\n\\nKey features of the population pyramid:\\n- The base of the pyramid (0-4 age range) is the widest, indicating a high number of young children.\\n- As the age range increases, the width of the bars decreases, showing a tapering effect typical of a youthful population.\\n- The population decreases steadily with age, with the smallest population in the 80+ age range.\\n- The pyramid shows a relatively balanced distribution between males and females across most age groups.\\n\\nThis image is relevant to a user query related to demographic analysis, population studies, or understanding the age and gender distribution of Nigeria's population.\"}\n \n\n Metadata for the answer:\n {'params': {'adm0_code': 'NGA'}, 'attribution': 'https://data.humdata.org/dataset/a7c3de5e-ff27-4746-99cd-05f2ad9b1066', 'data_url': 'https://data.humdata.org/dataset/a7c3de5e-ff27-4746-99cd-05f2ad9b1066/resource/562e7757-0683-4d61-87bd-a7c94af2ee38/download/nga_admpop_adm2_2020.csv', 'time_period': {'start': '2020-01-01', 'end': '2020-12-31T23:59:59'}}", "output": "['*AN IMAGE WAS OUTPUT, HERE IS ITS LLM-GENERATED DESCRIPTION* ... The image is a population pyramid for Nigeria (NGA). It displays the population distribution by age and gender. The horizontal axis represents the population in millions, with males on the left (in blue) and females on the right (in pink). The vertical axis represents different age ranges, starting from 0-4 at the bottom to 80+ at the top.\\n\\nKey observations:\\n1. The pyramid has a broad base, indicating a large number of young people (0-4 age range).\\n2. The population decreases steadily with increasing age, forming a typical pyramid shape.\\n3. There are more males than females in the younger age groups, but the difference is not very pronounced.\\n4. The population of both genders decreases significantly in the older age groups (60+).', '\u2705 A human approved this data recipe; Source; Raw data; 2020-01-01 to 2020-12-31']"}
{"test_scenario": "Assistant on-the-fly SQL, text answer", "query": "How many rows does the population table have for Nigeria", "chat_history": "[]", "context": "There are **43,794** rows of data in the population table for Nigeria.", "output": "['The population table has 43,794 rows for Nigeria.']"}
{"test_scenario": "Assistant created image (simple)", "query": "Plot f{x}=10", "chat_history": "[]", "context": "Image cksum: 3f4dafc66e68dc03e3ef6d2f02a85bc7\nImage description: {'content': 'The image is a plot of the function \\\\( f(x) = 10 \\\\). Here are the details of the plot:\\n\\n- The title of the plot is \"Plot of f(x) = 10\".\\n- The x-axis ranges from -10 to 10.\\n- The y-axis ranges from 0 to 10.\\n- The function \\\\( f(x) = 10 \\\\) is represented by a horizontal orange line at \\\\( y = 10 \\\\).\\n- There is a legend in the plot that labels the orange line as \"f(x) = 10\".\\n- The x-axis is labeled \"x\" and the y-axis is labeled \"f(x)\".\\n- The plot has grid lines for better readability.\\n\\nThe plot is relevant if the user query is about visualizing or understanding the function \\\\( f(x) = 10 \\\\), which is a constant function.'}", "output": "['*AN IMAGE WAS OUTPUT, HERE IS ITS LLM-GENERATED DESCRIPTION* ... The image is a plot of the function \\\\( f(x) = 10 \\\\). The graph shows a horizontal line at \\\\( f(x) = 10 \\\\) across the range of \\\\( x \\\\) values from -10 to 10. The x-axis is labeled \"x\" and the y-axis is labeled \"f(x)\". There is a legend in the plot indicating the line represents \\\\( f(x) = 10 \\\\). The plot title is \"Plot of f(x) = 10\".']"}
{"test_scenario": "Assistant answers using vector store", "query": "Is your data updated in real time?", "output": "['The data is not updated in real-time. For data sources configured as API data sources, the system will call them on-demand to pull in the latest data from the remote system. However, for data sources where data is ingested, like HAPI, the frequency of updates depends on how often the ingestion process is run, which is controlled by the user of the humanitarian AI assistant[0].','Caution: LLM Analysis; Sources: [0] HDIP FAQs (External) .pdf']"}

0 comments on commit 12439e6

Please sign in to comment.