From 7ff39d18576b208636a8ccf68a008a0eae3aa6f2 Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Sun, 8 Dec 2024 12:47:49 -0800 Subject: [PATCH 1/8] initial version working --- .../gmail_agent/functions_prompt.py | 360 ++++++++++ .../zero_to_hero_guide/gmail_agent/gmagent.py | 624 ++++++++++++++++++ docs/zero_to_hero_guide/gmail_agent/main.py | 116 ++++ .../gmail_agent/requirements.txt | 10 + 4 files changed, 1110 insertions(+) create mode 100644 docs/zero_to_hero_guide/gmail_agent/functions_prompt.py create mode 100644 docs/zero_to_hero_guide/gmail_agent/gmagent.py create mode 100644 docs/zero_to_hero_guide/gmail_agent/main.py create mode 100644 docs/zero_to_hero_guide/gmail_agent/requirements.txt diff --git a/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py b/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py new file mode 100644 index 0000000000..11e3abc3a7 --- /dev/null +++ b/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py @@ -0,0 +1,360 @@ +from typing import List, Dict, Any +from llama_stack_client.types.tool_param_definition_param import ToolParamDefinitionParam +from llama_stack_client.types import CompletionMessage, ToolResponseMessage +from llama_stack_client.lib.agents.custom_tool import CustomTool +from gmagent import * + +class ListEmailsTool(CustomTool): + """Custom tool for List Emails.""" + + def get_name(self) -> str: + return "list_emails" + + def get_description(self) -> str: + return "Return a list of emails matching an optionally specified query." + + def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: + return { + "maxResults": ToolParamDefinitionParam( + param_type="int", + description="The default maximum number of emails to return is 100; the maximum allowed value for this field is 500.", + required=False + ), + "query": ToolParamDefinitionParam( + param_type="str", + description="One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not).", + required=False + ) + } + async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: + assert len(messages) == 1, "Expected single message" + + message = messages[0] + + tool_call = message.tool_calls[0] + try: + response = await self.run_impl(**tool_call.arguments) + response_str = json.dumps(response, ensure_ascii=False) + except Exception as e: + response_str = f"Error when running tool: {e}" + + message = ToolResponseMessage( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=response_str, + role="ipython", + ) + return [message] + + async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: + """Query to get a list of emails matching the query.""" + emails = list_emails(query) + print(emails) + return emails + + +class GetEmailTool(CustomTool): + """Custom tool for Get Email Detail.""" + + def get_name(self) -> str: + return "get_email" + + def get_description(self) -> str: + return "Get detailed info about a specific email" + + def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: + return { + "detail": ToolParamDefinitionParam( + param_type="string", + description="what detail the user wants to know about - two possible values: body or attachment", + required=False + ), + "query": ToolParamDefinitionParam( + param_type="str", + description="One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not).", + required=False + ) + } + async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: + assert len(messages) == 1, "Expected single message" + + message = messages[0] + + tool_call = message.tool_calls[0] + try: + response = await self.run_impl(**tool_call.arguments) + response_str = json.dumps(response, ensure_ascii=False) + except Exception as e: + response_str = f"Error when running tool: {e}" + + message = ToolResponseMessage( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=response_str, + role="ipython", + ) + return [message] + + async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: + """Query to get a list of emails matching the query.""" + + emails = [] + return emails + + + +class SendEmailTool(CustomTool): + """Compose, reply, or forward email.""" + + def get_name(self) -> str: + return "send_email" + + def get_description(self) -> str: + return "Compose, reply, or forward email" + + def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: + return { + "action": ToolParamDefinitionParam( + param_type="string", + description="Whether to compose, reply, or forward an email", + required=True + ), + "to": ToolParamDefinitionParam( + param_type="str", + description="The recipient of the email", + required=True + ), + "subject": ToolParamDefinitionParam( + param_type="str", + description="The subject of the email", + required=True + ), + "body": ToolParamDefinitionParam( + param_type="str", + description="The content of the email", + required=True + ), + "email_id": ToolParamDefinitionParam( + param_type="str", + description="The email id to reply or forward to", + required=False + ) + } + + async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: + assert len(messages) == 1, "Expected single message" + + message = messages[0] + + tool_call = message.tool_calls[0] + try: + response = await self.run_impl(**tool_call.arguments) + response_str = json.dumps(response, ensure_ascii=False) + except Exception as e: + response_str = f"Error when running tool: {e}" + + message = ToolResponseMessage( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=response_str, + role="ipython", + ) + return [message] + + async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: + """Query to get a list of emails matching the query.""" + + emails = [] + return emails + + +class GetPDFSummaryTool(CustomTool): + """Get a summary of a PDF attachment.""" + + def get_name(self) -> str: + return "get_pdf_summary" + + def get_description(self) -> str: + return "Get a summary of a PDF attachment" + + def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: + return { + "file_name": ToolParamDefinitionParam( + param_type="string", + description="The name of the PDF file", + required=True + ) + } + + async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: + assert len(messages) == 1, "Expected single message" + + message = messages[0] + + tool_call = message.tool_calls[0] + try: + response = await self.run_impl(**tool_call.arguments) + response_str = json.dumps(response, ensure_ascii=False) + except Exception as e: + response_str = f"Error when running tool: {e}" + + message = ToolResponseMessage( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=response_str, + role="ipython", + ) + return [message] + + async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: + """Query to get a list of emails matching the query.""" + + emails = [] + return emails + + +class CreateDraftTool(CustomTool): + """Create a new, reply, or forward email draft.""" + + def get_name(self) -> str: + return "create_draft" + + def get_description(self) -> str: + return "Create a new, reply, or forward email draft" + + def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: + return { + "action": ToolParamDefinitionParam( + param_type="string", + description="Whether to compose, reply, or forward an email", + required=True + ), + "to": ToolParamDefinitionParam( + param_type="str", + description="The recipient of the email", + required=True + ), + "subject": ToolParamDefinitionParam( + param_type="str", + description="The subject of the email", + required=True + ), + "body": ToolParamDefinitionParam( + param_type="str", + description="The content of the email", + required=True + ), + "email_id": ToolParamDefinitionParam( + param_type="str", + description="The email id to reply or forward to, or empty if draft a new email.", + required=True + ) + } + + async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: + assert len(messages) == 1, "Expected single message" + + message = messages[0] + + tool_call = message.tool_calls[0] + try: + response = await self.run_impl(**tool_call.arguments) + response_str = json.dumps(response, ensure_ascii=False) + except Exception as e: + response_str = f"Error when running tool: {e}" + + message = ToolResponseMessage( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=response_str, + role="ipython", + ) + return [message] + + async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: + """Query to get a list of emails matching the query.""" + + emails = [] + return emails + + +class SendDraftTool(CustomTool): + """Send a draft email.""" + + def get_name(self) -> str: + return "send_draft" + + def get_description(self) -> str: + return "Send a draft email" + + def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: + return { + "id": ToolParamDefinitionParam( + param_type="str", + description="The email draft id.", + required=True + ) + } + + async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: + assert len(messages) == 1, "Expected single message" + + message = messages[0] + + tool_call = message.tool_calls[0] + try: + response = await self.run_impl(**tool_call.arguments) + response_str = json.dumps(response, ensure_ascii=False) + except Exception as e: + response_str = f"Error when running tool: {e}" + + message = ToolResponseMessage( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=response_str, + role="ipython", + ) + return [message] + + async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: + """Query to get a list of emails matching the query.""" + + emails = [] + return emails + + +examples = """ +{"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}} +{"name": "list_emails", "parameters": {"query": "has:attachment"}} +{"name": "list_emails", "parameters": {"query": "newer_than:1d"}} +{"name": "list_emails", "parameters": {"query": "older_than:1d"}} +{"name": "list_emails", "parameters": {"query": "is:unread"}} +{"name": "list_emails", "parameters": {"query": " is:unread"}} +{"name": "list_emails", "parameters": {"query": " is:read"}} +{"name": "get_email_detail", "parameters": {"detail": "body", "which": "first"}} +{"name": "get_email_detail", "parameters": {"detail": "body", "which": "last"}} +{"name": "get_email_detail", "parameters": {"detail": "body", "which": "second"}} +{"name": "get_email_detail", "parameters": {"detail": "body", "which": "subject "}} +{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": "from "}} +{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": "first"}} +{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": "last"}} +{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": ""}} +{"name": "send_email", "parameters": {"action": "compose", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx"}} +{"name": "send_email", "parameters": {"action": "reply", "to": "", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}} +{"name": "send_email", "parameters": {"action": "forward", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}} +{"name": "create_draft", "parameters": {"action": "new", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx", "email_id": ""}} +{"name": "create_draft", "parameters": {"action": "reply", "to": "", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}} +{"name": "create_draft", "parameters": {"action": "forward", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}} +{"name": "send_draft", "parameters": {"id": "..."}} +{"name": "get_pdf_summary", "parameters": {"file_name": "..."}} +""" + +system_prompt = f""" +Your name is Gmagent, an assistant that can perform all Gmail related tasks for your user. +Respond to the user's ask by making use of the following functions if needed. +If no available functions can be used, just say "I don't know" and don't make up facts. + +Example responses: +{examples} + +""" diff --git a/docs/zero_to_hero_guide/gmail_agent/gmagent.py b/docs/zero_to_hero_guide/gmail_agent/gmagent.py new file mode 100644 index 0000000000..4184ae1a4b --- /dev/null +++ b/docs/zero_to_hero_guide/gmail_agent/gmagent.py @@ -0,0 +1,624 @@ +from google.auth.transport.requests import Request +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from email.mime.base import MIMEBase +from email import encoders + +from bs4 import BeautifulSoup +import os +import pytz +import base64 +import pickle +from datetime import datetime, timezone +import json +import ollama +from pypdf import PdfReader +from pathlib import Path + +SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.compose'] + +def authenticate_gmail(user_email): + creds = None + token_file = f'token_{user_email}.pickle' # Unique token file for each user + + # Load the user's token if it exists + if os.path.exists(token_file): + with open(token_file, 'rb') as token: + creds = pickle.load(token) + + # If no valid credentials, prompt the user to log in + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES) + creds = flow.run_console() + + # Save the new credentials to a user-specific token file + with open(token_file, 'wb') as token: + pickle.dump(creds, token) + + # Build the Gmail API service + service = build('gmail', 'v1', credentials=creds) + return service + + +def num_of_emails(query=''): + response = service.users().messages().list( + userId='me', + q=query, + maxResults=1).execute() + return response.get('resultSizeEstimate', 0) + + +def list_emails(query='', max_results=100): + emails = [] + next_page_token = None + + while True: + response = service.users().messages().list( + userId=user_id, + maxResults=max_results, + pageToken=next_page_token, + q=query + ).execute() + + if 'messages' in response: + for msg in response['messages']: + sender, subject, received_time = get_email_info(msg['id']) + emails.append( + { + "message_id": msg['id'], + "sender": sender, + "subject": subject, + "received_time": received_time + } + ) + + next_page_token = response.get('nextPageToken') + + if not next_page_token: + break + + return emails + +def get_email_detail(detail, which=''): + if detail == 'body': + return get_email_body(which) + elif detail == 'attachment': + return get_email_attachments(which) + + +def get_email_body(message_id): + try: + message = service.users().messages().get( + userId=user_id, + id=message_id, + format='full').execute() + + # Recursive function to extract the parts + def extract_parts(payload): + text_body = "" + if 'parts' in payload: + for part in payload['parts']: + return extract_parts(part) + else: + mime_type = payload.get('mimeType') + body = payload.get('body', {}).get('data') + if mime_type == 'text/html': + decoded_body = base64.urlsafe_b64decode(body).decode('utf-8') + soup = BeautifulSoup(decoded_body, 'html.parser') + text_body = soup.get_text().strip() + elif mime_type == 'text/plain': + decoded_body = base64.urlsafe_b64decode(body).decode('utf-8') + text_body = decoded_body + + return text_body + + return extract_parts(message['payload']) + + except Exception as e: + print(f"An error occurred: {e}") + return None + + +def parse_message(message): + payload = message['payload'] + headers = payload.get("headers") + + subject = None + sender = None + for header in headers: + if header['name'] == 'Subject': + subject = header['value'] + elif header['name'] == 'From': + sender = header['value'] + + internal_date = message.get('internalDate') + utc_time = datetime.fromtimestamp(int(internal_date) / 1000, tz=timezone.utc) + + # Convert UTC to the specified timezone + local_timezone = pytz.timezone("America/Los_Angeles") + local_time = utc_time.astimezone(local_timezone) + + # Format the local time as a string + received_time = local_time.strftime('%Y-%m-%d %H:%M:%S %Z') + + # Check if the email is plain text or multipart + if 'parts' in payload: + # Multipart message - find the text/plain or text/html part + for part in payload['parts']: + if part['mimeType'] == 'text/plain' or part['mimeType'] == 'text/html': # You can also look for 'text/html' + data = part['body']['data'] + body = base64.urlsafe_b64decode(data).decode('utf-8') + return sender, subject, received_time, body + elif part['mimeType'] in ['multipart/related', 'multipart/mixed', 'multipart/alternative']: + return sender, subject, received_time, get_email_body(message.get('id')) + else: + # Single part message + data = payload['body']['data'] + body = base64.urlsafe_b64decode(data).decode('utf-8') + return sender, subject, received_time, body + + +def get_email_info(msg_id): + message = service.users().messages().get( + userId=user_id, + id=msg_id, + format='full').execute() + + sender, subject, received_time, body = parse_message(message) + + return sender, subject, received_time + + +def reply_email(message_id, reply_text): + # Fetch the original message + original_message = service.users().messages().get( + userId=user_id, + id=message_id, + format='full').execute() + + # Get headers + headers = original_message['payload']['headers'] + subject = None + to = None + for header in headers: + if header['name'] == 'Subject': + subject = header['value'] + if header['name'] == 'From': + to = header['value'] + + # Create the reply subject + if not subject.startswith("Re: "): + subject = "Re: " + subject + + # Compose the reply message + reply_message = MIMEText(reply_text) + reply_message['to'] = to + reply_message['from'] = user_id + reply_message['subject'] = subject + reply_message['In-Reply-To'] = message_id + + # Encode and send the message + raw_message = base64.urlsafe_b64encode(reply_message.as_bytes()).decode("utf-8") + body = {'raw': raw_message, + 'threadId': original_message['threadId']} + sent_message = service.users().messages().send( + userId=user_id, + body=body).execute() + print("Reply sent. Message ID:", sent_message['id']) + + +def forward_email(message_id, forward_to, email_body=None): + """ + Forwards an email, preserving the original MIME type, including multipart/related. + """ + # Get the original message in 'full' format + original_message = service.users().messages().get( + userId=user_id, + id=message_id, + format='full').execute() + + # Extract the payload and headers + payload = original_message.get('payload', {}) + headers = payload.get('headers', []) + parts = payload.get('parts', []) + # Get the Subject + subject = next((header['value'] for header in headers if header['name'].lower() == 'subject'), 'No Subject') + + # Create a new MIME message for forwarding + mime_message = MIMEMultipart(payload.get('mimeType', 'mixed').split('/')[-1]) + mime_message['To'] = forward_to + mime_message['Subject'] = f"Fwd: {subject}" + + # Add the optional custom email body + if email_body: + mime_message.attach(MIMEText(email_body, 'plain')) + + # Function to fetch attachment data by attachmentId + def fetch_attachment_data(attachment_id, message_id): + attachment = service.users().messages().attachments().get( + userId=user_id, messageId=message_id, id=attachment_id + ).execute() + return base64.urlsafe_b64decode(attachment['data']) + + # Rebuild MIME structure + def rebuild_parts(parts): + """ + Recursively rebuild MIME parts. + """ + if not parts: + return None + + for part in parts: + part_mime_type = part.get('mimeType', 'text/plain') + part_body = part.get('body', {}) + part_data = part_body.get('data', '') + part_parts = part.get('parts', []) # Sub-parts for multipart types + filename = part.get('filename') + attachment_id = part_body.get('attachmentId') + + if part_mime_type.startswith('multipart/'): + # Rebuild nested multipart + sub_multipart = MIMEMultipart(part_mime_type.split('/')[-1]) + sub_parts = rebuild_parts(part_parts) + if sub_parts: + for sub_part in sub_parts: + sub_multipart.attach(sub_part) + yield sub_multipart + elif filename and attachment_id: + # Handle attachments + decoded_data = fetch_attachment_data(attachment_id, message_id) + attachment = MIMEBase(*part_mime_type.split('/')) + attachment.set_payload(decoded_data) + encoders.encode_base64(attachment) + attachment.add_header('Content-Disposition', f'attachment; filename="{filename}"') + yield attachment + else: + if part_data: + # Decode and attach non-multipart parts + decoded_data = base64.urlsafe_b64decode(part_data) + + if part_mime_type == 'text/plain': + yield MIMEText(decoded_data.decode('utf-8'), 'plain') + elif part_mime_type == 'text/html': + yield MIMEText(decoded_data.decode('utf-8'), 'html') + + # Rebuild the main MIME structure + rebuilt_parts = rebuild_parts(parts) + if rebuilt_parts: + for rebuilt_part in rebuilt_parts: + mime_message.attach(rebuilt_part) + + # Encode the MIME message to base64 + raw = base64.urlsafe_b64encode(mime_message.as_bytes()).decode('utf-8') + + # Send the email + forward_body = {'raw': raw} + sent_message = service.users().messages().send(userId=user_id, body=forward_body).execute() + + print(f"Message forwarded successfully! Message ID: {sent_message['id']}") + + +def send_email(action, to, subject, body="", email_id=""): + if action == "compose": + message = MIMEText(body) + message['to'] = to + message['from'] = user_id + message['subject'] = subject + + # Encode and send the message + raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode("utf-8") + body = {'raw': raw_message} + sent_message = service.users().messages().send( + userId=user_id, + body=body).execute() + return sent_message['id'] + elif action == "reply": # reply or forward; a message id is needed + reply_email(email_id, body) + elif action == "forward": + forward_email(email_id, to, body) + + +def create_draft(action, to, subject, body="", email_id=""): + if action == "new": + message = MIMEText(body) + message['to'] = to + message['from'] = user_id + message['subject'] = subject + + encoded_message = base64.urlsafe_b64encode(message.as_bytes()).decode() + draft_body = {'message': {'raw': encoded_message}} + draft = service.users().drafts().create( + userId=user_id, + body=draft_body).execute() + print(f"Draft created with ID: {draft['id']}") + return draft['id'] + elif action == "reply": + return create_reply_draft(email_id, body) + elif action == "forward": + return create_forward_draft(email_id, to, body) + else: + return + + + +def create_reply_draft(message_id, reply_text): + # Fetch the original message + original_message = service.users().messages().get( + userId=user_id, + id=message_id, + format='full').execute() + + # Get headers + headers = original_message['payload']['headers'] + subject = None + to = None + for header in headers: + if header['name'] == 'Subject': + subject = header['value'] + if header['name'] == 'From': + to = header['value'] + + # Create the reply subject + if not subject.startswith("Re: "): + subject = "Re: " + subject + + # Compose the reply message + reply_message = MIMEText(reply_text) + reply_message['to'] = to + reply_message['from'] = user_id + reply_message['subject'] = subject + reply_message['In-Reply-To'] = message_id + + encoded_message = base64.urlsafe_b64encode(reply_message.as_bytes()).decode() + draft_body = {'message': {'raw': encoded_message, 'threadId': original_message['threadId']}} + draft = service.users().drafts().create(userId=user_id, body=draft_body).execute() + return draft['id'] + + +def create_forward_draft(message_id, recipient_email, custom_message=None): + # Get the original message + original_message = service.users().messages().get( + userId=user_id, + id=message_id, + format='raw').execute() + + # Decode the raw message + raw_message = base64.urlsafe_b64decode(original_message['raw'].encode('utf-8')) + + # Prepare the forward header and optional custom message + forward_header = f"----- Forwarded message -----\nFrom: {recipient_email}\n\n" + if custom_message: + forward_header += f"{custom_message}\n\n" + + # Combine the forward header with the original message + new_message = forward_header + raw_message.decode('utf-8') + + # Encode the combined message into base64 format + encoded_message = base64.urlsafe_b64encode(new_message.encode('utf-8')).decode('utf-8') + + draft_body = {'message': {'raw': encoded_message, 'threadId': original_message['threadId']}} + draft = service.users().drafts().create(userId=user_id, body=draft_body).execute() + print(f"Forward draft created with ID: {draft['id']}") + return draft['id'] + + +def send_draft(id): + sent_message = service.users().drafts().send( + userId=user_id, + body={'id': id} + ).execute() + return f"Draft sent with email ID: {sent_message['id']}" + + +def get_pdf_summary(file_name): + text = pdf2text(file_name) + print("Calling Llama to generate a summary...") + response = llama31(text, "Generate a summary of the input text in 5 sentences.") + return response + + +def get_email_attachments(message_id, mime_type='application/pdf'): + attachments = [] + + # Helper function to process email parts + def process_parts(parts): + for part in parts: + if part['mimeType'] in ['multipart/related', 'multipart/mixed', 'multipart/alternative']: + # Recursively process nested parts + if 'parts' in part: + process_parts(part['parts']) + elif 'filename' in part and part['filename']: + if part['mimeType'] == mime_type: # Check for the desired MIME type + attachment_id = part['body'].get('attachmentId') + if attachment_id: + # Get the attachment data + attachment = service.users().messages().attachments().get( + userId=user_id, + messageId=message_id, + id=attachment_id + ).execute() + + # Decode the attachment content + file_data = base64.urlsafe_b64decode(attachment['data'].encode('UTF-8')) + + with open(part['filename'], "wb") as f: + f.write(file_data) + + # Save the attachment information + attachments.append( + {'filename': part['filename'], + 'data': file_data, + 'size': attachment.get('size', 0) + }) + + # Retrieve the email message + message = service.users().messages().get( + userId=user_id, + id=message_id, + format='full').execute() + payload = message['payload'] + + # Start processing the parts + if 'parts' in payload: + process_parts(payload['parts']) + + rslt = "" + for a in attachments: + rslt += f"{a['filename']} - {a['size']} bytes\n" + return rslt #attachments + + +def pdf2text(file): + text = '' + try: + with Path(file).open("rb") as f: + reader = PdfReader(f) + text = "\n\n".join([page.extract_text() for page in reader.pages]) + except Exception as e: + raise f"Error reading the PDF file: {str(e)}" + + print(f"\nPDF text length: {len(text)}\n") + + return text + + +user_email = None +service = None +user_id = 'me' + +def set_email_service(gmail): + global user_email + global service + + user_email = gmail + service = authenticate_gmail(user_email) + +# class Agent: +# def __init__(self, system_prompt=""): +# self.system_prompt = system_prompt +# self.messages = [] +# +# # Gmagent-specific short term memory, used to answer follow up questions AFTER a list of emails is found matching user's query +# self.emails = [] +# self.draft_id = None +# +# if self.system_prompt: +# self.messages.append({"role": "system", "content": system_prompt}) +# +# def __call__(self, user_prompt_or_tool_result, is_tool_call=False): +# # if it's tool call result, use "ipython" instead of "user" for the role +# self.messages.append({"role": ("ipython" if is_tool_call else "user"), "content": user_prompt_or_tool_result}) +# result = self.llama() +# print(f"\nLlama returned: {result}.") +# if type(result) == dict: # result is a dict only if it's a tool call spec +# function_name = result["function_name"] +# func = globals()[function_name] +# parameters = result["parameters"] +# if function_name == "get_email_detail": +# # TODO: parse which - valid values are first, second, +# # third, fourth, last, from xxx +# if 'id' in parameters.keys(): +# parameters['which'] = parameters['id'] +# del parameters['id'] # per the function spec +# elif 'which' in parameters.keys(): +# if 'from ' in parameters['which']: +# sender = parameters['which'].split('from ')[-1] +# for email in self.emails: +# if email['sender'].find(sender) != -1: +# parameters['which'] = email['message_id'] +# break +# if 'subject ' in parameters['which']: +# subject = parameters['which'].split('subject ')[-1] +# # exact match beats substring +# for email in self.emails: +# if email['subject'].upper() == subject.upper(): +# parameters['which'] = email['message_id'] +# break +# elif email['subject'].upper().find(subject.upper()) != -1: +# parameters['which'] = email['message_id'] +# +# elif 'id_' in parameters['which']: +# parameters['which'] = parameters['which'].split('id_')[-1] +# else: +# parameters['which'] = self.emails[-1]['message_id'] +# elif function_name == "send_draft": +# parameters['id'] = self.draft_id +# +# print(f"\nCalling tool to access Gmail API: {function_name}, {parameters}...") +# result = func(**parameters) +# print(f"\nTool calling returned: {result}") +# +# # convert function calling result to concise summary, offering interactive follow ups, +# # for smooth and user friendly experience +# if function_name == 'list_emails': +# self.emails = result +# num = len(result) +# if num == 0: +# output = "I couldn't find any such emails. What else would you like to do?" +# elif num <= 5: +# output = f"I found {num} email{'s' if num > 1 else ''} matching your query:\n" +# for i, email in enumerate(result, start=1): +# output += f"{i}. From: {email['sender']}, Subject: {email['subject']}, Received on: {email['received_time']}\n" +# else: +# output = f"I found {num} emails matching your query. Here are the first 5 emails:\n" +# for i in range(1, 6): +# output += f"{i}. From: {result[i-1]['sender']}, Subject: {result[i-1]['subject']}, Received on: {result[i-1]['received_time']}\n" +# elif function_name == "get_email_detail": +# output = result +# elif function_name == "get_pdf_summary": +# output = result +# elif function_name == "send_email": +# output = "Email sent." +# elif function_name == "create_draft": +# output = "Draft created." +# self.draft_id = result +# elif function_name == "send_draft": +# output = result +# +# print(f"\n-------------------------\n\nGmagent: {output}\n") +# else: +# output = result # direct text, not JSON, response by Llama +# +# # adding this may cause Llama to hallucinate when answering +# # follow up questions. e.g. "do i have emails with attachments +# # larger than 20mb" got right tool calling response, then +# # follow up "larger than 10mb" got hallucinated response. +# # self.messages.append({"role": "assistant", "content": output}) +# +# # this mitigates the hallucination +# self.messages.append({"role": "assistant", "content": str(result)}) +# +# return output +# +# def llama(self): +# response = ollama.chat(model='llama3.1', +# messages = self.messages, +# options = { +# "temperature": 0.0 +# } +# ) +# result = response['message']['content'] +# +# try: +# res = json.loads(result.split("<|python_tag|>")[-1]) +# function_name = res['name'] +# parameters = res['parameters'] +# return {"function_name": function_name, +# "parameters": parameters} +# except: +# return result +# +# +def llama31(user_prompt: str, system_prompt = ""): + response = ollama.chat(model='llama3.1', + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + ) + return response['message']['content'] diff --git a/docs/zero_to_hero_guide/gmail_agent/main.py b/docs/zero_to_hero_guide/gmail_agent/main.py new file mode 100644 index 0000000000..f3d557b677 --- /dev/null +++ b/docs/zero_to_hero_guide/gmail_agent/main.py @@ -0,0 +1,116 @@ +import argparse +import gmagent +import asyncio +from gmagent import * +from functions_prompt import * #system_prompt + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types.agent_create_params import ( + AgentConfig, +) + +LLAMA_STACK_API_TOGETHER_URL="https://llama-stack.together.ai" +LLAMA31_8B_INSTRUCT = "Llama3.1-8B-Instruct" + +async def create_gmail_agent(client: LlamaStackClient) -> Agent: + """Create an agent with gmail tool capabilities.""" + + listEmailsTool = ListEmailsTool() + getEmailTool = GetEmailTool() + sendEmailTool = SendEmailTool() + getPDFSummaryTool = GetPDFSummaryTool() + createDraftTool = CreateDraftTool() + sendDraftTool = SendDraftTool() + + agent_config = AgentConfig( + model=LLAMA31_8B_INSTRUCT, + instructions=system_prompt, + sampling_params={ + "strategy": "greedy", + "temperature": 0.0, + "top_p": 0.9, + }, + tools=[ + listEmailsTool.get_tool_definition(), + getEmailTool.get_tool_definition(), + sendEmailTool.get_tool_definition(), + getPDFSummaryTool.get_tool_definition(), + createDraftTool.get_tool_definition(), + sendDraftTool.get_tool_definition(), + + ], + tool_choice="auto", + tool_prompt_format="json", + input_shields=[], + output_shields=[], + enable_session_persistence=True + ) + + agent = Agent( + client=client, + agent_config=agent_config, + custom_tools=[listEmailsTool, + getEmailTool, + sendEmailTool, + getPDFSummaryTool, + createDraftTool, + sendDraftTool] + ) + + return agent + + + + + +async def main(): + parser = argparse.ArgumentParser(description="Set email address") + parser.add_argument("--gmail", type=str, required=True, help="Your Gmail address") + args = parser.parse_args() + + gmagent.set_email_service(args.gmail) + + greeting = llama31("hello", "Your name is Gmagent, an assistant that can perform all Gmail related tasks for your user.") + agent_response = f"{greeting}\n\nYour ask: " + #agent = Agent(system_prompt) + + while True: + ask = input(agent_response) + if ask == "bye": + print(llama31("bye")) + break + print("\n-------------------------\nCalling Llama...") + # agent(ask) + # agent_response = "Your ask: " + + + client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL) + agent = await create_gmail_agent(client) + session_id = agent.create_session("email-session") + + queries = [ + "do i have any emails with attachments?", + "what's the content of the email from LangSmith", + ] + + for query in queries: + print(f"\nQuery: {query}") + print("-" * 50) + + response = agent.create_turn( + messages=[{"role": "user", "content": query}], + session_id=session_id, + ) + + async for log in EventLogger().log(response): + log.print() + + + +if __name__ == "__main__": + asyncio.run(main()) + + + diff --git a/docs/zero_to_hero_guide/gmail_agent/requirements.txt b/docs/zero_to_hero_guide/gmail_agent/requirements.txt new file mode 100644 index 0000000000..b96e1f620d --- /dev/null +++ b/docs/zero_to_hero_guide/gmail_agent/requirements.txt @@ -0,0 +1,10 @@ + +google-auth==2.27.0 +google-auth-oauthlib==0.4.6 +google-auth-httplib2==0.1.0 +google-api-python-client==2.34.0 +pytz +beautifulsoup4 +ollama +pypdf +termcolor \ No newline at end of file From 52937f9c3a038e3fc3f2563ad30e88254897d0c1 Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Mon, 9 Dec 2024 19:05:05 -0800 Subject: [PATCH 2/8] working version of using llama stack with multi-turn Qs --- .../gmail_agent/functions_prompt.py | 21 +++--- .../zero_to_hero_guide/gmail_agent/gmagent.py | 30 +++++++- docs/zero_to_hero_guide/gmail_agent/main.py | 73 ++++++++++++------- 3 files changed, 83 insertions(+), 41 deletions(-) diff --git a/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py b/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py index 11e3abc3a7..e9cbfb88a4 100644 --- a/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py +++ b/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py @@ -3,6 +3,7 @@ from llama_stack_client.types import CompletionMessage, ToolResponseMessage from llama_stack_client.lib.agents.custom_tool import CustomTool from gmagent import * +import json class ListEmailsTool(CustomTool): """Custom tool for List Emails.""" @@ -49,15 +50,14 @@ async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessa async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: """Query to get a list of emails matching the query.""" emails = list_emails(query) - print(emails) - return emails + return {"name": self.get_name(), "result": emails} -class GetEmailTool(CustomTool): +class GetEmailDetailTool(CustomTool): """Custom tool for Get Email Detail.""" def get_name(self) -> str: - return "get_email" + return "get_email_detail" def get_description(self) -> str: return "Get detailed info about a specific email" @@ -65,9 +65,9 @@ def get_description(self) -> str: def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: return { "detail": ToolParamDefinitionParam( - param_type="string", + param_type="str", description="what detail the user wants to know about - two possible values: body or attachment", - required=False + required=True ), "query": ToolParamDefinitionParam( param_type="str", @@ -95,12 +95,11 @@ async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessa ) return [message] - async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: - """Query to get a list of emails matching the query.""" - - emails = [] - return emails + async def run_impl(self, detail: str, query: str) -> Dict[str, Any]: + """Query to get the detail of an email.""" + detail = get_email_detail(detail, query) + return {"name": self.get_name(), "result": detail} class SendEmailTool(CustomTool): diff --git a/docs/zero_to_hero_guide/gmail_agent/gmagent.py b/docs/zero_to_hero_guide/gmail_agent/gmagent.py index 4184ae1a4b..45ab78a058 100644 --- a/docs/zero_to_hero_guide/gmail_agent/gmagent.py +++ b/docs/zero_to_hero_guide/gmail_agent/gmagent.py @@ -12,10 +12,10 @@ import base64 import pickle from datetime import datetime, timezone -import json import ollama from pypdf import PdfReader from pathlib import Path +from shared import memory SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.compose'] @@ -84,9 +84,33 @@ def list_emails(query='', max_results=100): return emails -def get_email_detail(detail, which=''): +def get_email_detail(detail, which): + message_id = None + # pre-processing + if 'from ' in which: + sender = which.split('from ')[-1] + for email in memory['emails']: + if email['sender'].find(sender) != -1: + message_id = email['message_id'] + break + elif 'subject:' in which: + subject = which.split('subject:')[-1] + # exact match beats substring + for email in memory['emails']: + if email['subject'].upper() == subject.upper(): + message_id = email['message_id'] + break + elif email['subject'].upper().find(subject.upper()) != -1: + message_id = email['message_id'] + + elif 'id_' in which: + message_id = which.split('id_')[-1] + else: + message_id = memory['emails'][-1]['message_id'] + + if detail == 'body': - return get_email_body(which) + return get_email_body(message_id) elif detail == 'attachment': return get_email_attachments(which) diff --git a/docs/zero_to_hero_guide/gmail_agent/main.py b/docs/zero_to_hero_guide/gmail_agent/main.py index f3d557b677..468a42b584 100644 --- a/docs/zero_to_hero_guide/gmail_agent/main.py +++ b/docs/zero_to_hero_guide/gmail_agent/main.py @@ -1,8 +1,8 @@ import argparse import gmagent import asyncio -from gmagent import * -from functions_prompt import * #system_prompt +import json +from functions_prompt import * from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent @@ -11,6 +11,8 @@ AgentConfig, ) +from shared import memory + LLAMA_STACK_API_TOGETHER_URL="https://llama-stack.together.ai" LLAMA31_8B_INSTRUCT = "Llama3.1-8B-Instruct" @@ -18,7 +20,7 @@ async def create_gmail_agent(client: LlamaStackClient) -> Agent: """Create an agent with gmail tool capabilities.""" listEmailsTool = ListEmailsTool() - getEmailTool = GetEmailTool() + getEmailDetailTool = GetEmailDetailTool() sendEmailTool = SendEmailTool() getPDFSummaryTool = GetPDFSummaryTool() createDraftTool = CreateDraftTool() @@ -34,7 +36,7 @@ async def create_gmail_agent(client: LlamaStackClient) -> Agent: }, tools=[ listEmailsTool.get_tool_definition(), - getEmailTool.get_tool_definition(), + getEmailDetailTool.get_tool_definition(), sendEmailTool.get_tool_definition(), getPDFSummaryTool.get_tool_definition(), createDraftTool.get_tool_definition(), @@ -52,7 +54,7 @@ async def create_gmail_agent(client: LlamaStackClient) -> Agent: client=client, agent_config=agent_config, custom_tools=[listEmailsTool, - getEmailTool, + getEmailDetailTool, sendEmailTool, getPDFSummaryTool, createDraftTool, @@ -62,9 +64,6 @@ async def create_gmail_agent(client: LlamaStackClient) -> Agent: return agent - - - async def main(): parser = argparse.ArgumentParser(description="Set email address") parser.add_argument("--gmail", type=str, required=True, help="Your Gmail address") @@ -74,7 +73,9 @@ async def main(): greeting = llama31("hello", "Your name is Gmagent, an assistant that can perform all Gmail related tasks for your user.") agent_response = f"{greeting}\n\nYour ask: " - #agent = Agent(system_prompt) + + # do i have emails with attachment larger than 5mb? + # what's the detail of the email with subject this is an interesting paper while True: ask = input(agent_response) @@ -82,30 +83,48 @@ async def main(): print(llama31("bye")) break print("\n-------------------------\nCalling Llama...") - # agent(ask) - # agent_response = "Your ask: " - client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL) agent = await create_gmail_agent(client) session_id = agent.create_session("email-session") - queries = [ - "do i have any emails with attachments?", - "what's the content of the email from LangSmith", - ] - - for query in queries: - print(f"\nQuery: {query}") - print("-" * 50) - - response = agent.create_turn( - messages=[{"role": "user", "content": query}], - session_id=session_id, - ) + response = agent.create_turn( + messages=[{"role": "user", "content": ask}], + session_id=session_id, + ) + + async for log in EventLogger().log(response): + if log.role == "CustomTool": + tool_name = json.loads(log.content)['name'] + result = json.loads(log.content)['result'] + if tool_name == 'list_emails': + # post processing + memory['emails'] = result + num = len(result) + if num == 0: + output = "I couldn't find any such emails. What else would you like to do?" + elif num <= 5: + output = f"I found {num} email{'s' if num > 1 else ''} matching your query:\n" + for i, email in enumerate(result, start=1): + output += f"{i}. From: {email['sender']}, Subject: {email['subject']}, Received on: {email['received_time']}\n" + else: + output = f"I found {num} emails matching your query. Here are the first 5 emails:\n" + for i in range(1, 6): + output += f"{i}. From: {result[i - 1]['sender']}, Subject: {result[i - 1]['subject']}, Received on: {result[i - 1]['received_time']}\n" + + elif tool_name == "get_email_detail": + output = result + + print(f"\n-------------------------\n\nGmagent: {output}\n") + elif log.role == "inference": + print("Llama returned: ", end="") + else: + print(log, end="") + + + + agent_response = "\n\nYour ask: " - async for log in EventLogger().log(response): - log.print() From a45c82aa1aa3938942be863a65af66782025c000 Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Tue, 10 Dec 2024 19:20:22 -0800 Subject: [PATCH 3/8] llama stack port completed; README update --- docs/zero_to_hero_guide/gmail_agent/README.md | 266 ++++++++++++++++++ .../gmail_agent/functions_prompt.py | 32 +-- .../zero_to_hero_guide/gmail_agent/gmagent.py | 145 +--------- docs/zero_to_hero_guide/gmail_agent/main.py | 59 ++-- .../gmail_agent/requirements.txt | 3 +- 5 files changed, 320 insertions(+), 185 deletions(-) create mode 100644 docs/zero_to_hero_guide/gmail_agent/README.md diff --git a/docs/zero_to_hero_guide/gmail_agent/README.md b/docs/zero_to_hero_guide/gmail_agent/README.md new file mode 100644 index 0000000000..a70b037cd7 --- /dev/null +++ b/docs/zero_to_hero_guide/gmail_agent/README.md @@ -0,0 +1,266 @@ +# Emagent - A Llama and Llama Stack Powered Email Agent + +This is a Llama Stack port of the [Emagent](https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent) app that shows how to build an email agent app powered by Llama 3.1 8B and Llama Stack, using Llama Stack custom tool and agent APIs. The end goal is to cover all components of a production-ready agent app, acting as an assistant to your email, with great user experience: intuitive, engaging, efficient and reliable. We'll use Gmail as an example but any email client API's can be used instead. + +Currently implemented features of Emagent include: +* search for emails and attachments +* get email detail +* reply to a specific email +* forward an email +* get summary of a PDF attachment +* draft and send an email + +If your main intent is to know the difference between using Llama Stack APIs or not for this agent implementation, go to [Implementation Notes](#implementation-notes). + +# Overview + +Email is an essential and one top killer app people use every day. A recent [State of AI Agents](https://www.langchain.com/stateofaiagents) survey by LangChain finds that "The top use cases for agents include performing research and summarization (58%), followed by streamlining tasks for personal productivity or assistance (53.5%)." + +Andrew Ng wrote a 5-part [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) in March 2024 predicting "AI agent workflows will drive massive AI progress this year". + +Deloitte published in November 2024 a report on [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) stating that "Through their ability to reason, plan, remember and act, AI agents address key limitations of typical language models." and "Executive leaders should make moves now to prepare for and embrace this next era of intelligent organizational transformation." + +In the Thanksgiving week, a new startup [/dev/agent](https://sdsa.ai/) building the next-gen OS for AI agents was in the spotlight. + +In December, Sequoia posted [here](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) saying 2024 has been the year of agents (an agent is an AI that can complete tasks, not only tells you how to do it but also does it for you directly), and 2025 will be the year of networks of AI agents. + +So what exactly is an AI agent and how to start building an agent app? + +## What is an agent? + +The concept of agent is not new - in the 2010 3rd edition of Russell and Norvig's classic book Artificial Intelligence: A Modern Approach ("Modern" by 2010, two years before the deep learning revolution that started the truly modern AI), an agent is defined as "anything that can be viewed as perceiving its environment through sensors and acting upon that environment through actuators". These days, AI agent basically means LLM-powered agent - well, if we treat natural language understanding as a type of sensor, LLM agent is still a sub-category of the traditional agent. + +Lilian Weng in her popular June 2023 blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) defines LLM-powered agent system to have four key components: + * Planning and Reflection: can break down large tasks into smaller ones; can do self-reflection over past actions and self improve; + * Memory: can use contextual info and recall info over extended periods (for other components to use); + * Tool Use: can understand what external APIs to use for info or action not built into LLMs; + * Action: can actually run the tools. + +Andrew Ng describes four [agentic design patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) as: +* Reflection +* Planning +* Tool calling +* Multi-agent collaboration, where "memory" is mentioned: Each agent implements its own workflow, has its own memory (itself a rapidly evolving area in agentic technology: how can an agent remember enough of its past interactions to perform better on upcoming ones?) + +In Deloitte's [report](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf), AI agents are reasoning engines that can understand context, plan workflows, connect to external tools and data, and execute actions to achieve a defined goal. + +In a November 2024 blog by Letta [The AI agents stack](https://www.letta.com/blog/ai-agents-stack), LLM powered agent is described as the combination of tools use, autonomous execution, and memory. + +In addition, Harrison Chase defines agent in the blog [What is an AI agent](https://blog.langchain.dev/what-is-an-agent/) as "a system that uses an LLM to decide the control flow of an application." + +Yet another simple [summary](https://www.felicis.com/insight/the-agentic-web) by Felicis of what an agent does is that an agent expands LLMs to go from chat to act: an agent can pair LLMs with external data, multi-step reasoning and planning, and act on the user's behalf. + +All in all (see [Resources](#resources) for even more info), agents are systems that take a high-level task, use an LLM as a reasoning and planning engine, with the help of contextual info and long-term memory if needed, to decide what actions to take, reflect and improve on the actions, and eventually execute those actions to accomplish the task. + +It's time to see an agent app in action and enjoy some coding. Below is a preview of the questions or requests one may ask Gmagent: + +# Example Asks to Gmagent + +* do i have emails with attachment larger than 5mb? +* what's the detail of the email with subject this is an interesting paper +* how many emails with attachment +* tell me the detail about the attachments for the email with subject papers to read? +* give me a summary of the pdf thinking_llm.pdf +* draft an email to jeffxtang@meta.com saying how about lunch together this thursday? +* send the draft + +# Setup and Installation + +If you feel intimated by the steps of the following Enable Gmail API section, you may want to check again the example asks (to see what you can ask to the agent) and the example log (to see the whole conversation with gmagent) - the devil's in the detail and all the glorious description of a powerful trendy agent may not mention the little details one has to deal with to build it. + +## Enable Gmail API +1. Go to the [Google Cloud Console](https://console.cloud.google.com/). +2. Create a new project by clicking the dropdown on the top left then click NEW PROJECT. +3. Enter a Project name then click CREATE. +4. Under "APIs & Services" > "Enabled APIs & services", search for "gmail" and then Enable the "Gmail API" for your project. +5. Under "APIs & Services" > "OAuth consent screen", click "GO TO NEW EXPERIENCE", then click "GET STARTED", enter App name, select your gmail as User support email, choose External under Audience, enter your gmail again as Contact Information, and finally check the I agree to the Google API Services under Finish and click Continue - Create. +5. Again under "APIs & Services", go to Credentials. Click on + CREATE CREDENTIALS, then choose OAuth client ID (NOT API key). +Select Desktop App (NOT Web application, because you're assumed to want to start your Gmail agent locally first) as the application type and name it. Click Create to generate your client ID and client secret. +6. Click Download JSON and rename the downloaded file as credentials.json. This file will be used in your Python script for authentication. + +## Install Ollama with Llama 3.1 8B + +Download Ollama (available for macOS, Linux, and Windows) [here](https://ollama.com/). Then download and test run the Llama 3.1 8B model by running on a Terminal: +``` +ollama run llama3.1 +``` + +This will download a quantized version of Llama 3.1 of the size 4.7GB. + +## Install required packages +First, create a Conda or virtual env: + +``` +conda create -n emagent python=3.10 +conda activate emagent +``` +or +``` +python -m venv emagent +source emagent/bin/activate # on Linux, macOS: +source emagent\Scripts\activate # on Windows +``` + +Then install the required Python libraries: +``` +git clone https://github.com/meta-llama/llama-stack +cd llama-stack/docs/zero_to_hero_guide/email_agent +pip install -r requirements.txt +``` + +# Run Emagent + +To run Emagent, you need to first copy the `credentials.json` file downloaded and renamed above in Step 6 of Enable Gmail API to the email_agent folder, then run: +``` +python main.py --gmail +``` + +The first time you run it, you'll get a prompt like this; +``` +Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=xxxx +Enter the authorization code: +``` + +You need to copy the URL above and open it in a browser - if you Sign in with Google using the same Gmail you enabled for the Gmail API, then you'll see "You’ve been given access to an app that’s currently being tested. You should only continue if you know the developer that invited you.", otherwise if you sign in with another Gmail, you'll see "Gmail Agent App has not completed the Google verification process. The app is currently being tested, and can only be accessed by developer-approved testers. If you think you should have access, contact the developer." + +In the latter case, go to APIs & Services > OAuth consent screen > Test users, and click the + ADD USERS button, and you'll see this message: While publishing status is set to "Testing", only test users are able to access the app. Allowed user cap prior to app verification is 100, and is counted over the entire lifetime of the app. + +After clicking Continue, check the Select all checkbox to enable both settings required for running Gmagent: +``` +View your email messages and settings. +Manage drafts and send emails. +``` + +Finally, copy the Authorization code and paste it to the Terminal, hit Enter and you'll see Gmagent's initial greeting (which will likely differ because the default temperature value 0.8 is used here - see [Ollama's model file](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values) for detail) such as: +``` +Hello! I'm Gmagent, here to help you manage your Gmail account with ease. + +What would you like to do today? Do you want me to: + +Check and respond to new emails +Compose a new email +Organize your inbox with filters or labels +Delete unwanted emails +Something else? + +Let me know how I can assist you! + +Your ask: +``` + +If you cancel here and run the command `python main.py --gmail ` again you should see the Gmagent greeting right away without the need to enter an authorization code, unless you enter a different Gmail address for the first time - in fact, for each authorized (added as a test user) Gmail address, a file `token_xxxx@gmail.com.pickle` will be created which contains the authorized token. + +See the example asks and interaction log above for the types of asks you may enter. + +# Implementation Notes +Notes here mainly cover how custom functions are defined, how Gmail API based functions are implemented, and how an Agent class is defined to handle memory for contextual chat and perform pre- and post-processing on the tool calling. + +## Available Custom Tool Definition +The `functions_prompt.py` defines the following six custom tools (functions), each as a subclass of Llama Stack's `CustomTool`, along with examples for each function call spec that Llama should return): + +* ListEmailsTool +* GetEmailDetailTool +* SendEmailTool +* GetPDFSummaryTool +* CreateDraftTool +* SendDraftTool + + +Below is an example custom tool call spec in JSON format, for the user asks such as "do i have emails with attachments larger than 5mb", "any attachments larger than 5mb" or "let me know if i have large attachments over 5mb": +``` +{"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}} +``` + +Porting the custom function definition to Llama Stack's CustomTool subclass is straightforward. + +## Actual Function Call Implementation + +For each defined custom function call, its implementation using the Gmail API is present in `gmagent.py`. And we simply call them in each of the CustomTool subclass's `run_impl` method. For example, the `list_emails` is defined as follows: + +``` +def list_emails(query='', max_results=100): + emails = [] + next_page_token = None + + while True: + response = service.users().messages().list( + userId=user_id, + maxResults=max_results, + pageToken=next_page_token, + q=query + ).execute() + + if 'messages' in response: + for msg in response['messages']: + sender, subject, received_time = get_email_info(msg['id']) + emails.append( + { + "message_id": msg['id'], + "sender": sender, + "subject": subject, + "received_time": received_time + } + ) + + next_page_token = response.get('nextPageToken') + + if not next_page_token: + break + + return emails +``` + +The function will be called by the Llama Stack agent in the `run_impl` method of the `ListEmailsTool` class if a user ask is like "do i have emails with attachments larger than 5mb": +``` +emails = list_emails(query) + ``` + +## The Llama Stack Agent class + +The `create_gmail_agent` in main.py creates a Llama Stack Agent with 6 custom tools using a `LlamaStackClient` instance that connects to Together.ai's Llama Stack server. The agent then creates a session, and in a loop, for each user ask, the agent uses the same session to create a turn, inside which a tool call spec is generated based on the user's ask and actual tool call then happens. After post-processing of the tool call result, a user-friendly message is printed to respond to the user's original ask. + +When you try out Emagent, you'll likely find that further pre- and post-processing still needed to make it production ready. In a great video on [Vertical LLM Agents](https://www.youtube.com/watch?v=eBVi_sLaYsc), Jake Heller said "after passes frankly even like 100 tests the odds that it will do on any random distribution of user inputs of the next 100,000, 100% accurately is very high" and "by the time you've dealt with like all the edge cases... there might be dozens of things you build into your application to actually make it work well and then you get to the prompting piece and writing out tests and very specific prompts and the strategy for how you break down a big problem into step by step by step thinking and how you feed in the information how you format that information the right way". That's what all the business logic is about. We'll cover decomposing a complicated ask and multi-step reasoning in a future version of Gmagent, and continue to explore the best possible way to streamline the pre- and post-processing. + +## Debugging output + +When running Gmagent, the detailed Llama returns, pre-processed tool call specs and the actual tool calling results are inside the `-------------------------` block, e.g.: + +------------------------- +Calling Llama... + +Llama returned: {'function_name': 'list_emails', 'parameters': {'query': 'subject:papers to read has:attachment'}}. + +Calling tool to access Gmail API: list_emails, {'query': 'subject:papers to read has:attachment'}... + +Tool calling returned: [{'message_id': '1936ef72ad3f30e8', 'sender': 'gmagent_tester1@gmail.com', 'subject': 'Fwd: papers to read', 'received_time': '2024-11-27 10:51:51 PST'}, {'message_id': '1936b819706a4923', 'sender': 'Jeff Tang ', 'subject': 'papers to read', 'received_time': '2024-11-26 18:44:19 PST'}] + +------------------------- + + +# TODOs + +1. Improve the search, reply, forward, create email draft, and query about types of attachments. +2. Improve the fallback and error handling mechanism when the user asks don't lead to a correct function calling spec or the function calling fails. +3. Improve the user experience by showing progress when some Gmail search API calls take long (minutes) to complete. +4. Implement the async behavior of Gmagent - schedule an email to be sent later. +5. Implement the agent planning - decomposing a complicated ask into sub-tasks, using ReAct and other methods. +6. Implement the agent long-term memory - longer context and memory across sessions (consider using Llama Stack/MemGPT/Letta) +7. Implement reflection - on the tool calling spec and results. +8. Introduce multiple-agent collaboration. +9. Implement the agent observability. +10. Compare different agent frameworks using Gmagent as the case study. +11. Add and implement a test plan and productionize Gmagent. + + +# Resources +1. Lilian Weng's blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) +2. Andrew Ng's posts [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) with basic [implementations from scratch](https://github.com/neural-maze/agentic_patterns). +3. LangChain's survey [State of AI Agents](https://www.langchain.com/stateofaiagents) +4. Deloitte's report [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) +5. Letta's blog [The AI agents stack](https://www.letta.com/blog/ai-agents-stack) +6. Microsoft's multi-agent system [Magentic-One](https://www.microsoft.com/en-us/research/articles/magentic-one-a-generalist-multi-agent-system-for-solving-complex-tasks) +7. Amazon's [Multi-Agent Orchestrator framework](https://awslabs.github.io/multi-agent-orchestrator/) +8. Deeplearning.ai's [agent related courses](https://www.deeplearning.ai/courses/?courses_date_desc%5Bquery%5D=agents) (Meta, AWS, Microsoft, LangChain, LlamaIndex, crewAI, AutoGen, Letta) and some [lessons ported to using Llama](https://github.com/meta-llama/llama-recipes/tree/main/recipes/quickstart/agents/DeepLearningai_Course_Notebooks). +9. Felicis's [The Agentic Web](https://www.felicis.com/insight/the-agentic-web) +10. A pretty complete [list of AI agents](https://github.com/e2b-dev/awesome-ai-agents), not including [/dev/agents](https://sdsa.ai/), a very new startup building the next-gen OS for AI agents, though. +11. Sequoia's [post](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) on 2024 being the year of AI agents and 2025 networks of AI agents. diff --git a/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py b/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py index e9cbfb88a4..3e974a1f20 100644 --- a/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py +++ b/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py @@ -160,11 +160,11 @@ async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessa ) return [message] - async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: - """Query to get a list of emails matching the query.""" + async def run_impl(self, action, to, subject, body="", email_id="") -> Dict[str, Any]: + """Send an email.""" - emails = [] - return emails + result = send_email(action, to, subject, body, email_id) + return {"name": self.get_name(), "result": result} class GetPDFSummaryTool(CustomTool): @@ -205,11 +205,11 @@ async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessa ) return [message] - async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: - """Query to get a list of emails matching the query.""" + async def run_impl(self, file_name: str) -> Dict[str, Any]: + """Get the summary of a PDF file.""" - emails = [] - return emails + summary = get_pdf_summary(file_name) + return {"name": self.get_name(), "result": summary} class CreateDraftTool(CustomTool): @@ -270,11 +270,11 @@ async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessa ) return [message] - async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: - """Query to get a list of emails matching the query.""" + async def run_impl(self, action, to, subject, body="", email_id="") -> Dict[str, Any]: + """Create an email draft.""" - emails = [] - return emails + result = create_draft(action, to, subject, body, email_id) + return {"name": self.get_name(), "result": result} class SendDraftTool(CustomTool): @@ -315,11 +315,11 @@ async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessa ) return [message] - async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: - """Query to get a list of emails matching the query.""" + async def run_impl(self, id: str) -> Dict[str, Any]: + """Send the last draft email.""" - emails = [] - return emails + result = send_draft(memory['draft_id']) + return {"name": self.get_name(), "result": result} examples = """ diff --git a/docs/zero_to_hero_guide/gmail_agent/gmagent.py b/docs/zero_to_hero_guide/gmail_agent/gmagent.py index 45ab78a058..0b55aa14e4 100644 --- a/docs/zero_to_hero_guide/gmail_agent/gmagent.py +++ b/docs/zero_to_hero_guide/gmail_agent/gmagent.py @@ -18,6 +18,9 @@ from shared import memory SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.compose'] +user_email = None +service = None +user_id = 'me' def authenticate_gmail(user_email): creds = None @@ -44,7 +47,6 @@ def authenticate_gmail(user_email): service = build('gmail', 'v1', credentials=creds) return service - def num_of_emails(query=''): response = service.users().messages().list( userId='me', @@ -52,7 +54,6 @@ def num_of_emails(query=''): maxResults=1).execute() return response.get('resultSizeEstimate', 0) - def list_emails(query='', max_results=100): emails = [] next_page_token = None @@ -108,12 +109,10 @@ def get_email_detail(detail, which): else: message_id = memory['emails'][-1]['message_id'] - if detail == 'body': return get_email_body(message_id) elif detail == 'attachment': - return get_email_attachments(which) - + return get_email_attachments(message_id) def get_email_body(message_id): try: @@ -147,7 +146,6 @@ def extract_parts(payload): print(f"An error occurred: {e}") return None - def parse_message(message): payload = message['payload'] headers = payload.get("headers") @@ -184,8 +182,7 @@ def parse_message(message): # Single part message data = payload['body']['data'] body = base64.urlsafe_b64decode(data).decode('utf-8') - return sender, subject, received_time, body - + return sender, subject, received_time, body def get_email_info(msg_id): message = service.users().messages().get( @@ -197,7 +194,6 @@ def get_email_info(msg_id): return sender, subject, received_time - def reply_email(message_id, reply_text): # Fetch the original message original_message = service.users().messages().get( @@ -235,7 +231,6 @@ def reply_email(message_id, reply_text): body=body).execute() print("Reply sent. Message ID:", sent_message['id']) - def forward_email(message_id, forward_to, email_body=None): """ Forwards an email, preserving the original MIME type, including multipart/related. @@ -326,7 +321,6 @@ def rebuild_parts(parts): print(f"Message forwarded successfully! Message ID: {sent_message['id']}") - def send_email(action, to, subject, body="", email_id=""): if action == "compose": message = MIMEText(body) @@ -346,7 +340,6 @@ def send_email(action, to, subject, body="", email_id=""): elif action == "forward": forward_email(email_id, to, body) - def create_draft(action, to, subject, body="", email_id=""): if action == "new": message = MIMEText(body) @@ -368,8 +361,6 @@ def create_draft(action, to, subject, body="", email_id=""): else: return - - def create_reply_draft(message_id, reply_text): # Fetch the original message original_message = service.users().messages().get( @@ -403,7 +394,6 @@ def create_reply_draft(message_id, reply_text): draft = service.users().drafts().create(userId=user_id, body=draft_body).execute() return draft['id'] - def create_forward_draft(message_id, recipient_email, custom_message=None): # Get the original message original_message = service.users().messages().get( @@ -430,14 +420,12 @@ def create_forward_draft(message_id, recipient_email, custom_message=None): print(f"Forward draft created with ID: {draft['id']}") return draft['id'] - def send_draft(id): sent_message = service.users().drafts().send( userId=user_id, body={'id': id} ).execute() return f"Draft sent with email ID: {sent_message['id']}" - def get_pdf_summary(file_name): text = pdf2text(file_name) @@ -445,7 +433,6 @@ def get_pdf_summary(file_name): response = llama31(text, "Generate a summary of the input text in 5 sentences.") return response - def get_email_attachments(message_id, mime_type='application/pdf'): attachments = [] @@ -496,7 +483,6 @@ def process_parts(parts): rslt += f"{a['filename']} - {a['size']} bytes\n" return rslt #attachments - def pdf2text(file): text = '' try: @@ -510,11 +496,6 @@ def pdf2text(file): return text - -user_email = None -service = None -user_id = 'me' - def set_email_service(gmail): global user_email global service @@ -522,122 +503,6 @@ def set_email_service(gmail): user_email = gmail service = authenticate_gmail(user_email) -# class Agent: -# def __init__(self, system_prompt=""): -# self.system_prompt = system_prompt -# self.messages = [] -# -# # Gmagent-specific short term memory, used to answer follow up questions AFTER a list of emails is found matching user's query -# self.emails = [] -# self.draft_id = None -# -# if self.system_prompt: -# self.messages.append({"role": "system", "content": system_prompt}) -# -# def __call__(self, user_prompt_or_tool_result, is_tool_call=False): -# # if it's tool call result, use "ipython" instead of "user" for the role -# self.messages.append({"role": ("ipython" if is_tool_call else "user"), "content": user_prompt_or_tool_result}) -# result = self.llama() -# print(f"\nLlama returned: {result}.") -# if type(result) == dict: # result is a dict only if it's a tool call spec -# function_name = result["function_name"] -# func = globals()[function_name] -# parameters = result["parameters"] -# if function_name == "get_email_detail": -# # TODO: parse which - valid values are first, second, -# # third, fourth, last, from xxx -# if 'id' in parameters.keys(): -# parameters['which'] = parameters['id'] -# del parameters['id'] # per the function spec -# elif 'which' in parameters.keys(): -# if 'from ' in parameters['which']: -# sender = parameters['which'].split('from ')[-1] -# for email in self.emails: -# if email['sender'].find(sender) != -1: -# parameters['which'] = email['message_id'] -# break -# if 'subject ' in parameters['which']: -# subject = parameters['which'].split('subject ')[-1] -# # exact match beats substring -# for email in self.emails: -# if email['subject'].upper() == subject.upper(): -# parameters['which'] = email['message_id'] -# break -# elif email['subject'].upper().find(subject.upper()) != -1: -# parameters['which'] = email['message_id'] -# -# elif 'id_' in parameters['which']: -# parameters['which'] = parameters['which'].split('id_')[-1] -# else: -# parameters['which'] = self.emails[-1]['message_id'] -# elif function_name == "send_draft": -# parameters['id'] = self.draft_id -# -# print(f"\nCalling tool to access Gmail API: {function_name}, {parameters}...") -# result = func(**parameters) -# print(f"\nTool calling returned: {result}") -# -# # convert function calling result to concise summary, offering interactive follow ups, -# # for smooth and user friendly experience -# if function_name == 'list_emails': -# self.emails = result -# num = len(result) -# if num == 0: -# output = "I couldn't find any such emails. What else would you like to do?" -# elif num <= 5: -# output = f"I found {num} email{'s' if num > 1 else ''} matching your query:\n" -# for i, email in enumerate(result, start=1): -# output += f"{i}. From: {email['sender']}, Subject: {email['subject']}, Received on: {email['received_time']}\n" -# else: -# output = f"I found {num} emails matching your query. Here are the first 5 emails:\n" -# for i in range(1, 6): -# output += f"{i}. From: {result[i-1]['sender']}, Subject: {result[i-1]['subject']}, Received on: {result[i-1]['received_time']}\n" -# elif function_name == "get_email_detail": -# output = result -# elif function_name == "get_pdf_summary": -# output = result -# elif function_name == "send_email": -# output = "Email sent." -# elif function_name == "create_draft": -# output = "Draft created." -# self.draft_id = result -# elif function_name == "send_draft": -# output = result -# -# print(f"\n-------------------------\n\nGmagent: {output}\n") -# else: -# output = result # direct text, not JSON, response by Llama -# -# # adding this may cause Llama to hallucinate when answering -# # follow up questions. e.g. "do i have emails with attachments -# # larger than 20mb" got right tool calling response, then -# # follow up "larger than 10mb" got hallucinated response. -# # self.messages.append({"role": "assistant", "content": output}) -# -# # this mitigates the hallucination -# self.messages.append({"role": "assistant", "content": str(result)}) -# -# return output -# -# def llama(self): -# response = ollama.chat(model='llama3.1', -# messages = self.messages, -# options = { -# "temperature": 0.0 -# } -# ) -# result = response['message']['content'] -# -# try: -# res = json.loads(result.split("<|python_tag|>")[-1]) -# function_name = res['name'] -# parameters = res['parameters'] -# return {"function_name": function_name, -# "parameters": parameters} -# except: -# return result -# -# def llama31(user_prompt: str, system_prompt = ""): response = ollama.chat(model='llama3.1', messages=[ diff --git a/docs/zero_to_hero_guide/gmail_agent/main.py b/docs/zero_to_hero_guide/gmail_agent/main.py index 468a42b584..537ad43c47 100644 --- a/docs/zero_to_hero_guide/gmail_agent/main.py +++ b/docs/zero_to_hero_guide/gmail_agent/main.py @@ -34,7 +34,7 @@ async def create_gmail_agent(client: LlamaStackClient) -> Agent: "temperature": 0.0, "top_p": 0.9, }, - tools=[ + tools = [ listEmailsTool.get_tool_definition(), getEmailDetailTool.get_tool_definition(), sendEmailTool.get_tool_definition(), @@ -43,27 +43,28 @@ async def create_gmail_agent(client: LlamaStackClient) -> Agent: sendDraftTool.get_tool_definition(), ], - tool_choice="auto", - tool_prompt_format="json", - input_shields=[], - output_shields=[], - enable_session_persistence=True + tool_choice = "auto", + tool_prompt_format = "json", + input_shields = [], + output_shields = [], + enable_session_persistence = True ) agent = Agent( - client=client, - agent_config=agent_config, - custom_tools=[listEmailsTool, - getEmailDetailTool, - sendEmailTool, - getPDFSummaryTool, - createDraftTool, - sendDraftTool] + client = client, + agent_config = agent_config, + custom_tools = ( + listEmailsTool, + getEmailDetailTool, + sendEmailTool, + getPDFSummaryTool, + createDraftTool, + sendDraftTool + ) ) return agent - async def main(): parser = argparse.ArgumentParser(description="Set email address") parser.add_argument("--gmail", type=str, required=True, help="Your Gmail address") @@ -74,8 +75,9 @@ async def main(): greeting = llama31("hello", "Your name is Gmagent, an assistant that can perform all Gmail related tasks for your user.") agent_response = f"{greeting}\n\nYour ask: " - # do i have emails with attachment larger than 5mb? - # what's the detail of the email with subject this is an interesting paper + client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL) + agent = await create_gmail_agent(client) + session_id = agent.create_session("email-session") while True: ask = input(agent_response) @@ -84,10 +86,6 @@ async def main(): break print("\n-------------------------\nCalling Llama...") - client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL) - agent = await create_gmail_agent(client) - session_id = agent.create_session("email-session") - response = agent.create_turn( messages=[{"role": "user", "content": ask}], session_id=session_id, @@ -97,8 +95,9 @@ async def main(): if log.role == "CustomTool": tool_name = json.loads(log.content)['name'] result = json.loads(log.content)['result'] + + # post processing if tool_name == 'list_emails': - # post processing memory['emails'] = result num = len(result) if num == 0: @@ -114,6 +113,15 @@ async def main(): elif tool_name == "get_email_detail": output = result + elif tool_name == "create_draft": + output = "Draft created." + memory['draft_id'] = result + elif tool_name == "send_draft": + output = result + elif tool_name == "send_email": + output = "Email sent." + elif tool_name == "get_pdf_summary": + output = result print(f"\n-------------------------\n\nGmagent: {output}\n") elif log.role == "inference": @@ -121,12 +129,7 @@ async def main(): else: print(log, end="") - - - agent_response = "\n\nYour ask: " - - - + agent_response = "Your ask: " if __name__ == "__main__": asyncio.run(main()) diff --git a/docs/zero_to_hero_guide/gmail_agent/requirements.txt b/docs/zero_to_hero_guide/gmail_agent/requirements.txt index b96e1f620d..e1255e8191 100644 --- a/docs/zero_to_hero_guide/gmail_agent/requirements.txt +++ b/docs/zero_to_hero_guide/gmail_agent/requirements.txt @@ -3,8 +3,9 @@ google-auth==2.27.0 google-auth-oauthlib==0.4.6 google-auth-httplib2==0.1.0 google-api-python-client==2.34.0 +llama_stack_client==0.0.50 pytz beautifulsoup4 -ollama +ollama==0.4.4 pypdf termcolor \ No newline at end of file From 61e837380c67ff71c872f38caa83cd28c80644fd Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Wed, 11 Dec 2024 18:04:32 -0800 Subject: [PATCH 4/8] rename folder; code readme update --- docs/zero_to_hero_guide/email_agent/README.md | 167 +++++++++++ .../gmagent.py => email_agent/email_agent.py} | 0 .../functions_prompt.py | 4 +- .../{gmail_agent => email_agent}/main.py | 12 +- .../requirements.txt | 1 - docs/zero_to_hero_guide/gmail_agent/README.md | 266 ------------------ 6 files changed, 175 insertions(+), 275 deletions(-) create mode 100644 docs/zero_to_hero_guide/email_agent/README.md rename docs/zero_to_hero_guide/{gmail_agent/gmagent.py => email_agent/email_agent.py} (100%) rename docs/zero_to_hero_guide/{gmail_agent => email_agent}/functions_prompt.py (99%) rename docs/zero_to_hero_guide/{gmail_agent => email_agent}/main.py (92%) rename docs/zero_to_hero_guide/{gmail_agent => email_agent}/requirements.txt (99%) delete mode 100644 docs/zero_to_hero_guide/gmail_agent/README.md diff --git a/docs/zero_to_hero_guide/email_agent/README.md b/docs/zero_to_hero_guide/email_agent/README.md new file mode 100644 index 0000000000..f578e57db0 --- /dev/null +++ b/docs/zero_to_hero_guide/email_agent/README.md @@ -0,0 +1,167 @@ +# A Llama and Llama Stack Powered Email Agent + +This is a Llama Stack port of the [Llama Powered Email Agent](https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent) app that shows how to build an email agent app powered by Llama 3.1 8B and Llama Stack, using Llama Stack custom tool and agent APIs. + +Currently implemented features of the agent include: +* search for emails and attachments +* get email detail +* reply to a specific email +* forward an email +* get summary of a PDF attachment +* draft and send an email + +We'll mainly cover here how to port a Llama app using native custom tools supported in Llama 3.1 (and later) and an agent implementation from scratch to using Llama Stack APIs. See the link above for a comprehensive overview, definition, and resources of LLM agents. + +# Setup and Installation + +See the link above for Enable Gmail API and Install Ollama with Llama 3.1 8B. + +## Install required packages +First, create a Conda or virtual env, then activate it and install the required Python libraries (slightly different from the original app because here we'll also install the `llama-stack-client` package): +``` +git clone https://github.com/meta-llama/llama-stack +cd llama-stack/docs/zero_to_hero_guide/email_agent +pip install -r requirements.txt +``` + +# Run Email Agent + +The steps are also the same as the [original app]((https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent). + +# Implementation Notes +Notes here mainly cover how custom tools (functions) are defined and how the Llama Stack Agent class is used with the custom tools. + +## Available Custom Tool Definition +The `functions_prompt.py` defines the following six custom tools (functions), each as a subclass of Llama Stack's `CustomTool`, along with examples for each function call spec that Llama should return): + +* ListEmailsTool +* GetEmailDetailTool +* SendEmailTool +* GetPDFSummaryTool +* CreateDraftTool +* SendDraftTool + +Below is an example custom tool call spec in JSON format, for the user asks such as "do i have emails with attachments larger than 5mb", "any attachments larger than 5mb" or "let me know if i have large attachments over 5mb": +``` +{"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}} +``` + +Porting the custom function definition in the original app to Llama Stack's CustomTool subclass is straightforward. Below is an example of the original custom function definition: +``` +list_emails_function = """ +{ + "type": "function", + "function": { + "name": "list_emails", + "description": "Return a list of emails matching an optionally specified query.", + "parameters": { + "type": "dic", + "properties": [ + { + "maxResults": { + "type": "integer", + "description": "The default maximum number of emails to return is 100; the maximum allowed value for this field is 500." + } + }, + { + "query": { + "type": "string", + "description": "One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not)." + } + } + ], + "required": [] + } + } +} +""" +``` + +And its Llama Stack CustomTool subclass implementation is: +``` +class ListEmailsTool(CustomTool): + """Custom tool for List Emails.""" + + def get_name(self) -> str: + return "list_emails" + + def get_description(self) -> str: + return "Return a list of emails matching an optionally specified query." + + def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]: + return { + "maxResults": ToolParamDefinitionParam( + param_type="int", + description="The default maximum number of emails to return is 100; the maximum allowed value for this field is 500.", + required=False + ), + "query": ToolParamDefinitionParam( + param_type="str", + description="One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not).", + required=False + ) + } + async def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: + assert len(messages) == 1, "Expected single message" + + message = messages[0] + + tool_call = message.tool_calls[0] + try: + response = await self.run_impl(**tool_call.arguments) + response_str = json.dumps(response, ensure_ascii=False) + except Exception as e: + response_str = f"Error when running tool: {e}" + + message = ToolResponseMessage( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=response_str, + role="ipython", + ) + return [message] + + async def run_impl(self, query: str, maxResults: int = 100) -> Dict[str, Any]: + """Query to get a list of emails matching the query.""" + emails = list_emails(query) + return {"name": self.get_name(), "result": emails} +``` + +Each CustomTool subclass has a `run_impl` method that calls actual Gmail API-based tool call implementation (same as the original app), which, in the example above, is `list_emails`. + +## The Llama Stack Agent class + +The `create_email_agent` in main.py creates a Llama Stack Agent with 6 custom tools using a `LlamaStackClient` instance that connects to Together.ai's Llama Stack server. The agent then creates a session, uses the same session in a loop to create a turn for each user ask. Inside each turn, a tool call spec is generated based on the user ask and, if needed after processing of the tool call spec to match what the actual Gmail API expects (e.g. get_email_detail requires an email id but the tool call spec generated by Llama doesn't have the id), actual tool calling happens. After post-processing of the tool call result, a user-friendly message is generated to respond to the user's original ask. + +## Memory + +In `shared.py` we define a simple dictionary `memory`, used to hold short-term results such as a list of found emails based on the user ask, or the draft id of a created email draft. They're needed to answer follow up user asks such as "what attachments does the email with subject xxx have" or "send the draft". + + +# TODOs + +1. Improve the search, reply, forward, create email draft, and query about types of attachments. +2. Improve the fallback and error handling mechanism when the user asks don't lead to a correct function calling spec or the function calling fails. +3. Improve the user experience by showing progress when some Gmail search API calls take long (minutes) to complete. +4. Implement the async behavior of the agent - schedule an email to be sent later. +5. Implement the agent planning - decomposing a complicated ask into sub-tasks, using ReAct and other methods. +6. Implement the agent long-term memory - longer context and memory across sessions (consider using Llama Stack/MemGPT/Letta) +7. Implement reflection - on the tool calling spec and results. +8. Introduce multiple-agent collaboration. +9. Implement the agent observability. +10. Compare different agent frameworks using the agent as the case study. +11. Add and implement a test plan and productionize the email agent. + + +# Resources +1. Lilian Weng's blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) +2. Andrew Ng's posts [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) with basic [implementations from scratch](https://github.com/neural-maze/agentic_patterns). +3. LangChain's survey [State of AI Agents](https://www.langchain.com/stateofaiagents) +4. Deloitte's report [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) +5. Letta's blog [The AI agents stack](https://www.letta.com/blog/ai-agents-stack) +6. Microsoft's multi-agent system [Magentic-One](https://www.microsoft.com/en-us/research/articles/magentic-one-a-generalist-multi-agent-system-for-solving-complex-tasks) +7. Amazon's [Multi-Agent Orchestrator framework](https://awslabs.github.io/multi-agent-orchestrator/) +8. Deeplearning.ai's [agent related courses](https://www.deeplearning.ai/courses/?courses_date_desc%5Bquery%5D=agents) (Meta, AWS, Microsoft, LangChain, LlamaIndex, crewAI, AutoGen, Letta) and some [lessons ported to using Llama](https://github.com/meta-llama/llama-recipes/tree/main/recipes/quickstart/agents/DeepLearningai_Course_Notebooks). +9. Felicis's [The Agentic Web](https://www.felicis.com/insight/the-agentic-web) +10. A pretty complete [list of AI agents](https://github.com/e2b-dev/awesome-ai-agents), not including [/dev/agents](https://sdsa.ai/), a very new startup building the next-gen OS for AI agents, though. +11. Sequoia's [post](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) on 2024 being the year of AI agents and 2025 networks of AI agents. diff --git a/docs/zero_to_hero_guide/gmail_agent/gmagent.py b/docs/zero_to_hero_guide/email_agent/email_agent.py similarity index 100% rename from docs/zero_to_hero_guide/gmail_agent/gmagent.py rename to docs/zero_to_hero_guide/email_agent/email_agent.py diff --git a/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py b/docs/zero_to_hero_guide/email_agent/functions_prompt.py similarity index 99% rename from docs/zero_to_hero_guide/gmail_agent/functions_prompt.py rename to docs/zero_to_hero_guide/email_agent/functions_prompt.py index 3e974a1f20..02f0997a09 100644 --- a/docs/zero_to_hero_guide/gmail_agent/functions_prompt.py +++ b/docs/zero_to_hero_guide/email_agent/functions_prompt.py @@ -2,7 +2,7 @@ from llama_stack_client.types.tool_param_definition_param import ToolParamDefinitionParam from llama_stack_client.types import CompletionMessage, ToolResponseMessage from llama_stack_client.lib.agents.custom_tool import CustomTool -from gmagent import * +from email_agent import * import json class ListEmailsTool(CustomTool): @@ -349,7 +349,7 @@ async def run_impl(self, id: str) -> Dict[str, Any]: """ system_prompt = f""" -Your name is Gmagent, an assistant that can perform all Gmail related tasks for your user. +Your name is Email Agent, an assistant that can perform all email related tasks for your user. Respond to the user's ask by making use of the following functions if needed. If no available functions can be used, just say "I don't know" and don't make up facts. diff --git a/docs/zero_to_hero_guide/gmail_agent/main.py b/docs/zero_to_hero_guide/email_agent/main.py similarity index 92% rename from docs/zero_to_hero_guide/gmail_agent/main.py rename to docs/zero_to_hero_guide/email_agent/main.py index 537ad43c47..1c3fdfc536 100644 --- a/docs/zero_to_hero_guide/gmail_agent/main.py +++ b/docs/zero_to_hero_guide/email_agent/main.py @@ -1,5 +1,5 @@ import argparse -import gmagent +import email_agent import asyncio import json from functions_prompt import * @@ -16,7 +16,7 @@ LLAMA_STACK_API_TOGETHER_URL="https://llama-stack.together.ai" LLAMA31_8B_INSTRUCT = "Llama3.1-8B-Instruct" -async def create_gmail_agent(client: LlamaStackClient) -> Agent: +async def create_email_agent(client: LlamaStackClient) -> Agent: """Create an agent with gmail tool capabilities.""" listEmailsTool = ListEmailsTool() @@ -67,16 +67,16 @@ async def create_gmail_agent(client: LlamaStackClient) -> Agent: async def main(): parser = argparse.ArgumentParser(description="Set email address") - parser.add_argument("--gmail", type=str, required=True, help="Your Gmail address") + parser.add_argument("--email", type=str, required=True, help="Your Gmail address") args = parser.parse_args() - gmagent.set_email_service(args.gmail) + email_agent.set_email_service(args.email) - greeting = llama31("hello", "Your name is Gmagent, an assistant that can perform all Gmail related tasks for your user.") + greeting = llama31("hello", "Your name is Email Agent, an assistant that can perform all email related tasks for your user.") agent_response = f"{greeting}\n\nYour ask: " client = LlamaStackClient(base_url=LLAMA_STACK_API_TOGETHER_URL) - agent = await create_gmail_agent(client) + agent = await create_email_agent(client) session_id = agent.create_session("email-session") while True: diff --git a/docs/zero_to_hero_guide/gmail_agent/requirements.txt b/docs/zero_to_hero_guide/email_agent/requirements.txt similarity index 99% rename from docs/zero_to_hero_guide/gmail_agent/requirements.txt rename to docs/zero_to_hero_guide/email_agent/requirements.txt index e1255e8191..da96182dd2 100644 --- a/docs/zero_to_hero_guide/gmail_agent/requirements.txt +++ b/docs/zero_to_hero_guide/email_agent/requirements.txt @@ -1,4 +1,3 @@ - google-auth==2.27.0 google-auth-oauthlib==0.4.6 google-auth-httplib2==0.1.0 diff --git a/docs/zero_to_hero_guide/gmail_agent/README.md b/docs/zero_to_hero_guide/gmail_agent/README.md deleted file mode 100644 index a70b037cd7..0000000000 --- a/docs/zero_to_hero_guide/gmail_agent/README.md +++ /dev/null @@ -1,266 +0,0 @@ -# Emagent - A Llama and Llama Stack Powered Email Agent - -This is a Llama Stack port of the [Emagent](https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent) app that shows how to build an email agent app powered by Llama 3.1 8B and Llama Stack, using Llama Stack custom tool and agent APIs. The end goal is to cover all components of a production-ready agent app, acting as an assistant to your email, with great user experience: intuitive, engaging, efficient and reliable. We'll use Gmail as an example but any email client API's can be used instead. - -Currently implemented features of Emagent include: -* search for emails and attachments -* get email detail -* reply to a specific email -* forward an email -* get summary of a PDF attachment -* draft and send an email - -If your main intent is to know the difference between using Llama Stack APIs or not for this agent implementation, go to [Implementation Notes](#implementation-notes). - -# Overview - -Email is an essential and one top killer app people use every day. A recent [State of AI Agents](https://www.langchain.com/stateofaiagents) survey by LangChain finds that "The top use cases for agents include performing research and summarization (58%), followed by streamlining tasks for personal productivity or assistance (53.5%)." - -Andrew Ng wrote a 5-part [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) in March 2024 predicting "AI agent workflows will drive massive AI progress this year". - -Deloitte published in November 2024 a report on [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) stating that "Through their ability to reason, plan, remember and act, AI agents address key limitations of typical language models." and "Executive leaders should make moves now to prepare for and embrace this next era of intelligent organizational transformation." - -In the Thanksgiving week, a new startup [/dev/agent](https://sdsa.ai/) building the next-gen OS for AI agents was in the spotlight. - -In December, Sequoia posted [here](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) saying 2024 has been the year of agents (an agent is an AI that can complete tasks, not only tells you how to do it but also does it for you directly), and 2025 will be the year of networks of AI agents. - -So what exactly is an AI agent and how to start building an agent app? - -## What is an agent? - -The concept of agent is not new - in the 2010 3rd edition of Russell and Norvig's classic book Artificial Intelligence: A Modern Approach ("Modern" by 2010, two years before the deep learning revolution that started the truly modern AI), an agent is defined as "anything that can be viewed as perceiving its environment through sensors and acting upon that environment through actuators". These days, AI agent basically means LLM-powered agent - well, if we treat natural language understanding as a type of sensor, LLM agent is still a sub-category of the traditional agent. - -Lilian Weng in her popular June 2023 blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) defines LLM-powered agent system to have four key components: - * Planning and Reflection: can break down large tasks into smaller ones; can do self-reflection over past actions and self improve; - * Memory: can use contextual info and recall info over extended periods (for other components to use); - * Tool Use: can understand what external APIs to use for info or action not built into LLMs; - * Action: can actually run the tools. - -Andrew Ng describes four [agentic design patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) as: -* Reflection -* Planning -* Tool calling -* Multi-agent collaboration, where "memory" is mentioned: Each agent implements its own workflow, has its own memory (itself a rapidly evolving area in agentic technology: how can an agent remember enough of its past interactions to perform better on upcoming ones?) - -In Deloitte's [report](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf), AI agents are reasoning engines that can understand context, plan workflows, connect to external tools and data, and execute actions to achieve a defined goal. - -In a November 2024 blog by Letta [The AI agents stack](https://www.letta.com/blog/ai-agents-stack), LLM powered agent is described as the combination of tools use, autonomous execution, and memory. - -In addition, Harrison Chase defines agent in the blog [What is an AI agent](https://blog.langchain.dev/what-is-an-agent/) as "a system that uses an LLM to decide the control flow of an application." - -Yet another simple [summary](https://www.felicis.com/insight/the-agentic-web) by Felicis of what an agent does is that an agent expands LLMs to go from chat to act: an agent can pair LLMs with external data, multi-step reasoning and planning, and act on the user's behalf. - -All in all (see [Resources](#resources) for even more info), agents are systems that take a high-level task, use an LLM as a reasoning and planning engine, with the help of contextual info and long-term memory if needed, to decide what actions to take, reflect and improve on the actions, and eventually execute those actions to accomplish the task. - -It's time to see an agent app in action and enjoy some coding. Below is a preview of the questions or requests one may ask Gmagent: - -# Example Asks to Gmagent - -* do i have emails with attachment larger than 5mb? -* what's the detail of the email with subject this is an interesting paper -* how many emails with attachment -* tell me the detail about the attachments for the email with subject papers to read? -* give me a summary of the pdf thinking_llm.pdf -* draft an email to jeffxtang@meta.com saying how about lunch together this thursday? -* send the draft - -# Setup and Installation - -If you feel intimated by the steps of the following Enable Gmail API section, you may want to check again the example asks (to see what you can ask to the agent) and the example log (to see the whole conversation with gmagent) - the devil's in the detail and all the glorious description of a powerful trendy agent may not mention the little details one has to deal with to build it. - -## Enable Gmail API -1. Go to the [Google Cloud Console](https://console.cloud.google.com/). -2. Create a new project by clicking the dropdown on the top left then click NEW PROJECT. -3. Enter a Project name then click CREATE. -4. Under "APIs & Services" > "Enabled APIs & services", search for "gmail" and then Enable the "Gmail API" for your project. -5. Under "APIs & Services" > "OAuth consent screen", click "GO TO NEW EXPERIENCE", then click "GET STARTED", enter App name, select your gmail as User support email, choose External under Audience, enter your gmail again as Contact Information, and finally check the I agree to the Google API Services under Finish and click Continue - Create. -5. Again under "APIs & Services", go to Credentials. Click on + CREATE CREDENTIALS, then choose OAuth client ID (NOT API key). -Select Desktop App (NOT Web application, because you're assumed to want to start your Gmail agent locally first) as the application type and name it. Click Create to generate your client ID and client secret. -6. Click Download JSON and rename the downloaded file as credentials.json. This file will be used in your Python script for authentication. - -## Install Ollama with Llama 3.1 8B - -Download Ollama (available for macOS, Linux, and Windows) [here](https://ollama.com/). Then download and test run the Llama 3.1 8B model by running on a Terminal: -``` -ollama run llama3.1 -``` - -This will download a quantized version of Llama 3.1 of the size 4.7GB. - -## Install required packages -First, create a Conda or virtual env: - -``` -conda create -n emagent python=3.10 -conda activate emagent -``` -or -``` -python -m venv emagent -source emagent/bin/activate # on Linux, macOS: -source emagent\Scripts\activate # on Windows -``` - -Then install the required Python libraries: -``` -git clone https://github.com/meta-llama/llama-stack -cd llama-stack/docs/zero_to_hero_guide/email_agent -pip install -r requirements.txt -``` - -# Run Emagent - -To run Emagent, you need to first copy the `credentials.json` file downloaded and renamed above in Step 6 of Enable Gmail API to the email_agent folder, then run: -``` -python main.py --gmail -``` - -The first time you run it, you'll get a prompt like this; -``` -Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=xxxx -Enter the authorization code: -``` - -You need to copy the URL above and open it in a browser - if you Sign in with Google using the same Gmail you enabled for the Gmail API, then you'll see "You’ve been given access to an app that’s currently being tested. You should only continue if you know the developer that invited you.", otherwise if you sign in with another Gmail, you'll see "Gmail Agent App has not completed the Google verification process. The app is currently being tested, and can only be accessed by developer-approved testers. If you think you should have access, contact the developer." - -In the latter case, go to APIs & Services > OAuth consent screen > Test users, and click the + ADD USERS button, and you'll see this message: While publishing status is set to "Testing", only test users are able to access the app. Allowed user cap prior to app verification is 100, and is counted over the entire lifetime of the app. - -After clicking Continue, check the Select all checkbox to enable both settings required for running Gmagent: -``` -View your email messages and settings. -Manage drafts and send emails. -``` - -Finally, copy the Authorization code and paste it to the Terminal, hit Enter and you'll see Gmagent's initial greeting (which will likely differ because the default temperature value 0.8 is used here - see [Ollama's model file](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values) for detail) such as: -``` -Hello! I'm Gmagent, here to help you manage your Gmail account with ease. - -What would you like to do today? Do you want me to: - -Check and respond to new emails -Compose a new email -Organize your inbox with filters or labels -Delete unwanted emails -Something else? - -Let me know how I can assist you! - -Your ask: -``` - -If you cancel here and run the command `python main.py --gmail ` again you should see the Gmagent greeting right away without the need to enter an authorization code, unless you enter a different Gmail address for the first time - in fact, for each authorized (added as a test user) Gmail address, a file `token_xxxx@gmail.com.pickle` will be created which contains the authorized token. - -See the example asks and interaction log above for the types of asks you may enter. - -# Implementation Notes -Notes here mainly cover how custom functions are defined, how Gmail API based functions are implemented, and how an Agent class is defined to handle memory for contextual chat and perform pre- and post-processing on the tool calling. - -## Available Custom Tool Definition -The `functions_prompt.py` defines the following six custom tools (functions), each as a subclass of Llama Stack's `CustomTool`, along with examples for each function call spec that Llama should return): - -* ListEmailsTool -* GetEmailDetailTool -* SendEmailTool -* GetPDFSummaryTool -* CreateDraftTool -* SendDraftTool - - -Below is an example custom tool call spec in JSON format, for the user asks such as "do i have emails with attachments larger than 5mb", "any attachments larger than 5mb" or "let me know if i have large attachments over 5mb": -``` -{"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}} -``` - -Porting the custom function definition to Llama Stack's CustomTool subclass is straightforward. - -## Actual Function Call Implementation - -For each defined custom function call, its implementation using the Gmail API is present in `gmagent.py`. And we simply call them in each of the CustomTool subclass's `run_impl` method. For example, the `list_emails` is defined as follows: - -``` -def list_emails(query='', max_results=100): - emails = [] - next_page_token = None - - while True: - response = service.users().messages().list( - userId=user_id, - maxResults=max_results, - pageToken=next_page_token, - q=query - ).execute() - - if 'messages' in response: - for msg in response['messages']: - sender, subject, received_time = get_email_info(msg['id']) - emails.append( - { - "message_id": msg['id'], - "sender": sender, - "subject": subject, - "received_time": received_time - } - ) - - next_page_token = response.get('nextPageToken') - - if not next_page_token: - break - - return emails -``` - -The function will be called by the Llama Stack agent in the `run_impl` method of the `ListEmailsTool` class if a user ask is like "do i have emails with attachments larger than 5mb": -``` -emails = list_emails(query) - ``` - -## The Llama Stack Agent class - -The `create_gmail_agent` in main.py creates a Llama Stack Agent with 6 custom tools using a `LlamaStackClient` instance that connects to Together.ai's Llama Stack server. The agent then creates a session, and in a loop, for each user ask, the agent uses the same session to create a turn, inside which a tool call spec is generated based on the user's ask and actual tool call then happens. After post-processing of the tool call result, a user-friendly message is printed to respond to the user's original ask. - -When you try out Emagent, you'll likely find that further pre- and post-processing still needed to make it production ready. In a great video on [Vertical LLM Agents](https://www.youtube.com/watch?v=eBVi_sLaYsc), Jake Heller said "after passes frankly even like 100 tests the odds that it will do on any random distribution of user inputs of the next 100,000, 100% accurately is very high" and "by the time you've dealt with like all the edge cases... there might be dozens of things you build into your application to actually make it work well and then you get to the prompting piece and writing out tests and very specific prompts and the strategy for how you break down a big problem into step by step by step thinking and how you feed in the information how you format that information the right way". That's what all the business logic is about. We'll cover decomposing a complicated ask and multi-step reasoning in a future version of Gmagent, and continue to explore the best possible way to streamline the pre- and post-processing. - -## Debugging output - -When running Gmagent, the detailed Llama returns, pre-processed tool call specs and the actual tool calling results are inside the `-------------------------` block, e.g.: - -------------------------- -Calling Llama... - -Llama returned: {'function_name': 'list_emails', 'parameters': {'query': 'subject:papers to read has:attachment'}}. - -Calling tool to access Gmail API: list_emails, {'query': 'subject:papers to read has:attachment'}... - -Tool calling returned: [{'message_id': '1936ef72ad3f30e8', 'sender': 'gmagent_tester1@gmail.com', 'subject': 'Fwd: papers to read', 'received_time': '2024-11-27 10:51:51 PST'}, {'message_id': '1936b819706a4923', 'sender': 'Jeff Tang ', 'subject': 'papers to read', 'received_time': '2024-11-26 18:44:19 PST'}] - -------------------------- - - -# TODOs - -1. Improve the search, reply, forward, create email draft, and query about types of attachments. -2. Improve the fallback and error handling mechanism when the user asks don't lead to a correct function calling spec or the function calling fails. -3. Improve the user experience by showing progress when some Gmail search API calls take long (minutes) to complete. -4. Implement the async behavior of Gmagent - schedule an email to be sent later. -5. Implement the agent planning - decomposing a complicated ask into sub-tasks, using ReAct and other methods. -6. Implement the agent long-term memory - longer context and memory across sessions (consider using Llama Stack/MemGPT/Letta) -7. Implement reflection - on the tool calling spec and results. -8. Introduce multiple-agent collaboration. -9. Implement the agent observability. -10. Compare different agent frameworks using Gmagent as the case study. -11. Add and implement a test plan and productionize Gmagent. - - -# Resources -1. Lilian Weng's blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) -2. Andrew Ng's posts [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) with basic [implementations from scratch](https://github.com/neural-maze/agentic_patterns). -3. LangChain's survey [State of AI Agents](https://www.langchain.com/stateofaiagents) -4. Deloitte's report [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) -5. Letta's blog [The AI agents stack](https://www.letta.com/blog/ai-agents-stack) -6. Microsoft's multi-agent system [Magentic-One](https://www.microsoft.com/en-us/research/articles/magentic-one-a-generalist-multi-agent-system-for-solving-complex-tasks) -7. Amazon's [Multi-Agent Orchestrator framework](https://awslabs.github.io/multi-agent-orchestrator/) -8. Deeplearning.ai's [agent related courses](https://www.deeplearning.ai/courses/?courses_date_desc%5Bquery%5D=agents) (Meta, AWS, Microsoft, LangChain, LlamaIndex, crewAI, AutoGen, Letta) and some [lessons ported to using Llama](https://github.com/meta-llama/llama-recipes/tree/main/recipes/quickstart/agents/DeepLearningai_Course_Notebooks). -9. Felicis's [The Agentic Web](https://www.felicis.com/insight/the-agentic-web) -10. A pretty complete [list of AI agents](https://github.com/e2b-dev/awesome-ai-agents), not including [/dev/agents](https://sdsa.ai/), a very new startup building the next-gen OS for AI agents, though. -11. Sequoia's [post](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) on 2024 being the year of AI agents and 2025 networks of AI agents. From a7d29952b0f630b1a27fabce97cef4364079ef51 Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Wed, 11 Dec 2024 18:14:44 -0800 Subject: [PATCH 5/8] draft bug fix --- docs/zero_to_hero_guide/email_agent/README.md | 6 +++++- docs/zero_to_hero_guide/email_agent/email_agent.py | 2 +- docs/zero_to_hero_guide/email_agent/main.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/zero_to_hero_guide/email_agent/README.md b/docs/zero_to_hero_guide/email_agent/README.md index f578e57db0..4e30cf57d2 100644 --- a/docs/zero_to_hero_guide/email_agent/README.md +++ b/docs/zero_to_hero_guide/email_agent/README.md @@ -26,7 +26,11 @@ pip install -r requirements.txt # Run Email Agent -The steps are also the same as the [original app]((https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent). +The steps are also the same as the [original app]((https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent): + +``` +python main.py --gmail +``` # Implementation Notes Notes here mainly cover how custom tools (functions) are defined and how the Llama Stack Agent class is used with the custom tools. diff --git a/docs/zero_to_hero_guide/email_agent/email_agent.py b/docs/zero_to_hero_guide/email_agent/email_agent.py index 0b55aa14e4..9e372fb224 100644 --- a/docs/zero_to_hero_guide/email_agent/email_agent.py +++ b/docs/zero_to_hero_guide/email_agent/email_agent.py @@ -423,7 +423,7 @@ def create_forward_draft(message_id, recipient_email, custom_message=None): def send_draft(id): sent_message = service.users().drafts().send( userId=user_id, - body={'id': id} + body={'id': memory['draft_id']} ).execute() return f"Draft sent with email ID: {sent_message['id']}" diff --git a/docs/zero_to_hero_guide/email_agent/main.py b/docs/zero_to_hero_guide/email_agent/main.py index 1c3fdfc536..f4d37ed0a0 100644 --- a/docs/zero_to_hero_guide/email_agent/main.py +++ b/docs/zero_to_hero_guide/email_agent/main.py @@ -123,7 +123,7 @@ async def main(): elif tool_name == "get_pdf_summary": output = result - print(f"\n-------------------------\n\nGmagent: {output}\n") + print(f"\n-------------------------\n\nAgent: {output}\n") elif log.role == "inference": print("Llama returned: ", end="") else: From cc75a8ce1bca776e07d5f5ffe958b650f94d8d1b Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Wed, 11 Dec 2024 18:15:45 -0800 Subject: [PATCH 6/8] added missing file --- docs/zero_to_hero_guide/email_agent/shared.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/zero_to_hero_guide/email_agent/shared.py diff --git a/docs/zero_to_hero_guide/email_agent/shared.py b/docs/zero_to_hero_guide/email_agent/shared.py new file mode 100644 index 0000000000..ea2b95ce2a --- /dev/null +++ b/docs/zero_to_hero_guide/email_agent/shared.py @@ -0,0 +1 @@ +memory = {} \ No newline at end of file From f3073d9fb1e1c0df3a39c75c3b39b007036b3a55 Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Wed, 11 Dec 2024 18:18:07 -0800 Subject: [PATCH 7/8] README cleanup --- docs/zero_to_hero_guide/email_agent/README.md | 31 +------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/docs/zero_to_hero_guide/email_agent/README.md b/docs/zero_to_hero_guide/email_agent/README.md index 4e30cf57d2..17e6af34d9 100644 --- a/docs/zero_to_hero_guide/email_agent/README.md +++ b/docs/zero_to_hero_guide/email_agent/README.md @@ -10,7 +10,7 @@ Currently implemented features of the agent include: * get summary of a PDF attachment * draft and send an email -We'll mainly cover here how to port a Llama app using native custom tools supported in Llama 3.1 (and later) and an agent implementation from scratch to using Llama Stack APIs. See the link above for a comprehensive overview, definition, and resources of LLM agents. +We'll mainly cover here how to port a Llama app using native custom tools supported in Llama 3.1 (and later) and an agent implementation from scratch to using Llama Stack APIs. See the link above for a comprehensive overview, definition, and resources of LLM agents, and a detailed list of TODOs for the email agent. # Setup and Installation @@ -140,32 +140,3 @@ The `create_email_agent` in main.py creates a Llama Stack Agent with 6 custom to ## Memory In `shared.py` we define a simple dictionary `memory`, used to hold short-term results such as a list of found emails based on the user ask, or the draft id of a created email draft. They're needed to answer follow up user asks such as "what attachments does the email with subject xxx have" or "send the draft". - - -# TODOs - -1. Improve the search, reply, forward, create email draft, and query about types of attachments. -2. Improve the fallback and error handling mechanism when the user asks don't lead to a correct function calling spec or the function calling fails. -3. Improve the user experience by showing progress when some Gmail search API calls take long (minutes) to complete. -4. Implement the async behavior of the agent - schedule an email to be sent later. -5. Implement the agent planning - decomposing a complicated ask into sub-tasks, using ReAct and other methods. -6. Implement the agent long-term memory - longer context and memory across sessions (consider using Llama Stack/MemGPT/Letta) -7. Implement reflection - on the tool calling spec and results. -8. Introduce multiple-agent collaboration. -9. Implement the agent observability. -10. Compare different agent frameworks using the agent as the case study. -11. Add and implement a test plan and productionize the email agent. - - -# Resources -1. Lilian Weng's blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) -2. Andrew Ng's posts [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) with basic [implementations from scratch](https://github.com/neural-maze/agentic_patterns). -3. LangChain's survey [State of AI Agents](https://www.langchain.com/stateofaiagents) -4. Deloitte's report [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) -5. Letta's blog [The AI agents stack](https://www.letta.com/blog/ai-agents-stack) -6. Microsoft's multi-agent system [Magentic-One](https://www.microsoft.com/en-us/research/articles/magentic-one-a-generalist-multi-agent-system-for-solving-complex-tasks) -7. Amazon's [Multi-Agent Orchestrator framework](https://awslabs.github.io/multi-agent-orchestrator/) -8. Deeplearning.ai's [agent related courses](https://www.deeplearning.ai/courses/?courses_date_desc%5Bquery%5D=agents) (Meta, AWS, Microsoft, LangChain, LlamaIndex, crewAI, AutoGen, Letta) and some [lessons ported to using Llama](https://github.com/meta-llama/llama-recipes/tree/main/recipes/quickstart/agents/DeepLearningai_Course_Notebooks). -9. Felicis's [The Agentic Web](https://www.felicis.com/insight/the-agentic-web) -10. A pretty complete [list of AI agents](https://github.com/e2b-dev/awesome-ai-agents), not including [/dev/agents](https://sdsa.ai/), a very new startup building the next-gen OS for AI agents, though. -11. Sequoia's [post](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) on 2024 being the year of AI agents and 2025 networks of AI agents. From f6caf6ef74b93acba5ecd11c8a36ebb41ca202bf Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Mon, 16 Dec 2024 14:48:44 -0800 Subject: [PATCH 8/8] README update with email agent links --- docs/zero_to_hero_guide/email_agent/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zero_to_hero_guide/email_agent/README.md b/docs/zero_to_hero_guide/email_agent/README.md index 17e6af34d9..04f89f5550 100644 --- a/docs/zero_to_hero_guide/email_agent/README.md +++ b/docs/zero_to_hero_guide/email_agent/README.md @@ -1,6 +1,6 @@ # A Llama and Llama Stack Powered Email Agent -This is a Llama Stack port of the [Llama Powered Email Agent](https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent) app that shows how to build an email agent app powered by Llama 3.1 8B and Llama Stack, using Llama Stack custom tool and agent APIs. +This is a Llama Stack port of the [Llama Powered Email Agent](https://github.com/meta-llama/llama-recipes/tree/main/recipes/use_cases/email_agent) app that shows how to build an email agent app powered by Llama 3.1 8B and Llama Stack, using Llama Stack custom tool and agent APIs. Currently implemented features of the agent include: * search for emails and attachments @@ -26,7 +26,7 @@ pip install -r requirements.txt # Run Email Agent -The steps are also the same as the [original app]((https://github.com/meta-llama/llama-recipes/tree/gmagent/recipes/use_cases/email_agent): +The steps are also the same as the [original app](https://github.com/meta-llama/llama-recipes/tree/main/recipes/use_cases/email_agent): ``` python main.py --gmail