diff --git a/lib/api/functions/pipeline/updateDataFeed/index.ts b/lib/api/functions/pipeline/updateDataFeed/index.ts index e4f7aec..9fbc7a0 100644 --- a/lib/api/functions/pipeline/updateDataFeed/index.ts +++ b/lib/api/functions/pipeline/updateDataFeed/index.ts @@ -6,26 +6,17 @@ import { import * as ddb from '@aws-appsync/utils/dynamodb' export function request (ctx: Context): DynamoDBUpdateItemRequest { - const { dataFeedId } = ctx.args.input const values: Record = {} - Object.keys(ctx.args.input as Record).forEach( - (key: string) => { - if ( - ctx.args?.input[key] !== undefined && - ctx.args?.input[key] !== null && - key !== 'dataFeedId' - ) { - console.log( - `UpdateDataFeed. Loop values: ${key} ---- ${ctx.args.input[key]}` - ) - values[key] = ctx.args.input[key] - } + for (const [key, value] of Object.entries( + ctx.args.input as Record + )) { + if (key !== 'id' && value !== undefined && value !== null) { + values[key] = value } - ) - + } return ddb.update({ key: { - dataFeedId, + dataFeedId: ctx.args.input.id, sk: 'dataFeed' }, update: { ...values } diff --git a/lib/data-feed-ingestion/rss-atom-ingestion/ingestion-step-function.filter-articles-with-bedrock.ts b/lib/data-feed-ingestion/rss-atom-ingestion/ingestion-step-function.filter-articles-with-bedrock.ts new file mode 100644 index 0000000..5568b1c --- /dev/null +++ b/lib/data-feed-ingestion/rss-atom-ingestion/ingestion-step-function.filter-articles-with-bedrock.ts @@ -0,0 +1,218 @@ +/* + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: MIT-0 + */ +import { Tracer } from '@aws-lambda-powertools/tracer' +import { captureLambdaHandler } from '@aws-lambda-powertools/tracer/middleware' +import { Logger } from '@aws-lambda-powertools/logger' +import { injectLambdaContext } from '@aws-lambda-powertools/logger/middleware' +import middy from '@middy/core' +import { type FeedArticle } from '../../shared/common' +import { + DynamoDBClient, + GetItemCommand, + GetItemCommandInput +} from '@aws-sdk/client-dynamodb' +import axios from 'axios' +import * as cheerio from 'cheerio' +import { + BedrockRuntimeClient, + InvokeModelCommand, + InvokeModelCommandInput +} from '@aws-sdk/client-bedrock-runtime' + +const SERVICE_NAME = 'filter-articles-with-bedrock' + +const tracer = new Tracer({ serviceName: SERVICE_NAME }) +const logger = new Logger({ serviceName: SERVICE_NAME }) + +const dynamodb = tracer.captureAWSv3Client(new DynamoDBClient()) +const bedrockRuntimeClient = tracer.captureAWSv3Client( + new BedrockRuntimeClient() +) + +const DATA_FEED_TABLE = process.env.DATA_FEED_TABLE +const BEDROCK_MODEL_ID = 'anthropic.claude-3-haiku-20240307-v1:0' + +interface FilterArticlesWithBedrockInput { + dataFeedId: string + articles: FeedArticle[] +} + +const lambdaHandler = async ( + event: FilterArticlesWithBedrockInput +): Promise => { + const { dataFeedId, articles } = event + logger.debug('Filtering articles with Bedrock for Data Feed ID ', dataFeedId) + logger.debug('Unfiltered new article count = ', { + articleLength: articles.length + }) + const filteredArticles = await filterArticlesWithBedrock(articles, dataFeedId) + logger.debug('Filtered article count = ' + filteredArticles.length) + return filteredArticles +} + +const filterArticlesWithBedrock = async ( + articles: FeedArticle[], + dataFeedId: string +): Promise => { + const filteredArticles: FeedArticle[] = [] + const filterPrompt = await getFilterPrompt(dataFeedId) + if (filterPrompt === null) { + return articles + } + for (const article of articles) { + logger.debug('Working on article', { article }) + const siteContent = await getSiteContent(article.url) + if (siteContent !== null) { + const isFiltered = await isArticleFilteredWithBedrock( + siteContent, + filterPrompt + ) + if (!isFiltered) { + console.debug('Article passed filter: ' + article.title) + filteredArticles.push(article) + } else { + console.debug('Article filtered out: ' + article.title) + } + } + } + return filteredArticles +} + +const getFilterPrompt = async (dataFeedId: string): Promise => { + // Get the filter prompt from dynamoDB using the dataFeedId + logger.debug('Getting filter prompt for data feed ', dataFeedId) + const input: GetItemCommandInput = { + Key: { + dataFeedId: { + S: dataFeedId + }, + sk: { + S: 'dataFeed' + } + }, + TableName: DATA_FEED_TABLE, + AttributesToGet: ['articleFilterPrompt'] + } + const command = new GetItemCommand(input) + const result = await dynamodb.send(command) + if ( + result.Item !== undefined && + result.Item.articleFilterPrompt?.S !== undefined + ) { + logger.debug( + 'Filter prompt found for data feed ' + result.Item.articleFilterPrompt.S, + dataFeedId + ) + return result.Item.articleFilterPrompt.S + } else { + logger.debug('No filter prompt found for data feed ', dataFeedId) + return null + } +} + +const isArticleFilteredWithBedrock = async ( + articleContent: string, + filterPrompt: string +): Promise => { + if (filterPrompt === null) { + return false + } + const prompt = + 'You are an agent responsible for reading articles and determining if the article should be filtered out based on the filter prompt.' + + "Is the article filtered out based on the filter prompt? Return either 'true' or 'false'." + + "If the article is filtered out, return 'true', otherwise return 'false'." + + 'Here is the article content:\n' + + '
' + + articleContent + + '
\n' + + 'Here is the filter prompt:\n' + + '' + + filterPrompt + + '' + + "Only return 'true' if the article is filtered out based on the filter prompt. Do not return any other content." + + 'Place the response in a xml tag.' + + const input: InvokeModelCommandInput = { + modelId: BEDROCK_MODEL_ID, + contentType: 'application/json', + accept: '*/*', + body: new TextEncoder().encode( + JSON.stringify({ + max_tokens: 1000, + anthropic_version: 'bedrock-2023-05-31', + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: prompt + } + ] + } + ] + }) + ) + } + const command = new InvokeModelCommand(input) + const response = await bedrockRuntimeClient.send(command) + const responseText = new TextDecoder().decode(response.body) + console.debug('Response from Bedrock: ' + responseText) + const responseObject = JSON.parse(responseText) + return extractResponseValue(responseObject.content[0].text, 'filter_response') +} + +const getSiteContent = async (url: string): Promise => { + logger.debug(`getSiteContent Called; url = ${url}`) + tracer.putMetadata('url', url) + let $: cheerio.Root + try { + logger.debug('URL of Provided Site = ' + url) + const response = await axios.get(url) + tracer.putAnnotation('url', 'Successfully Crawled') + const text = response.data as string + $ = cheerio.load(text) + // Cutting out elements that aren't needed + $('footer').remove() + $('header').remove() + $('script').remove() + $('style').remove() + $('nav').remove() + } catch (error) { + logger.error(`Failed to crawl; url = ${url}`) + logger.error(JSON.stringify(error)) + tracer.addErrorAsMetadata(error as Error) + throw error + } + let articleText: string = '' + if ($('article').length > 0) { + articleText = $('article').text() + } else { + articleText = $('body').text() + } + if (articleText !== undefined) { + return articleText + } else { + return null + } +} + +const extractResponseValue = (response: string, xml_tag: string): boolean => { + const formattedInput = response + .replace(/(\r\n|\n|\r)/gm, '') + .replace(/\\n/g, '') + const open_tag = `<${xml_tag}>` + const close_tag = `` + const regex = new RegExp(`(?<=${open_tag})(.*?)(?=${close_tag})`, 'g') + const match = formattedInput.match(regex) + const isFiltered = match?.[0].toLocaleLowerCase() === 'true' + return isFiltered +} + +export const handler = middy() + .handler(lambdaHandler) + .use(captureLambdaHandler(tracer, { captureResponse: false })) + .use(injectLambdaContext(logger)) diff --git a/lib/data-feed-ingestion/rss-atom-ingestion/ingestion-step-function.ts b/lib/data-feed-ingestion/rss-atom-ingestion/ingestion-step-function.ts index 016813f..0d3cf5e 100644 --- a/lib/data-feed-ingestion/rss-atom-ingestion/ingestion-step-function.ts +++ b/lib/data-feed-ingestion/rss-atom-ingestion/ingestion-step-function.ts @@ -127,6 +127,40 @@ export class IngestionStepFunction extends Construct { }) ) + const filterArticlesWithBedrockFunction = new NodejsFunction( + this, + 'filter-articles-with-bedrock', + { + description: + 'Function responsible for filtering out using a user provided prompt and Amazon Bedrock.', + handler: 'handler', + entry: new URL( + import.meta.url.replace( + /(.*)(\..+)/, + '$1.' + 'filter-articles-with-bedrock' + '$2' + ) + ).pathname, + runtime: Runtime.NODEJS_20_X, + architecture: Architecture.ARM_64, + tracing: Tracing.ACTIVE, + loggingFormat: LoggingFormat.JSON, + applicationLogLevelV2: ApplicationLogLevel.DEBUG, + insightsVersion: LambdaInsightsVersion.VERSION_1_0_229_0, + timeout: cdk.Duration.minutes(5), + environment: { + DATA_FEED_TABLE: dataFeedTable.tableName + } + } + ) + dataFeedTable.grantReadData(filterArticlesWithBedrockFunction) + filterArticlesWithBedrockFunction.addToRolePolicy( + new PolicyStatement({ + actions: ['bedrock:InvokeModel'], + resources: ['*'], + effect: Effect.ALLOW + }) + ) + const getDataFeedDetailsJob = new DynamoGetItem( this, 'GetDataFeedDetailsJob', @@ -182,6 +216,23 @@ export class IngestionStepFunction extends Construct { payload: TaskInput.fromJsonPathAt('$') }) + const filterArticlesWithBedrockJob = new LambdaInvoke( + this, + 'FilterArticlesWithBedrock', + { + lambdaFunction: filterArticlesWithBedrockFunction, + inputPath: JsonPath.stringAt('$'), + payload: TaskInput.fromObject({ + dataFeedId: JsonPath.stringAt('$.dataFeedId'), + articles: JsonPath.objectAt('$.articlesData.articles') + }), + resultSelector: { + 'articles.$': '$.Payload' + }, + resultPath: '$.articlesData' + } + ) + const mapArticles = new Map(this, 'MapArticles', { itemsPath: '$.articlesData.articles', itemSelector: { @@ -197,6 +248,7 @@ export class IngestionStepFunction extends Construct { const definition = getDataFeedDetailsJob .next(readFeedJob) .next(filterIngestedArticlesJob) + .next(filterArticlesWithBedrockJob) .next(mapArticles) const stateMachine = new StateMachine(this, 'IngestionStateMachine', { @@ -217,6 +269,7 @@ export class IngestionStepFunction extends Construct { feedReaderFunction.grantInvoke(stateMachine) filterIngestedArticlesFunction.grantInvoke(stateMachine) articleIngestionFunction.grantInvoke(stateMachine) + filterArticlesWithBedrockFunction.grantInvoke(stateMachine) props.dataFeedTable.grantWriteData(articleIngestionFunction) props.rssAtomDataBucket.grantPut(stateMachine) this.stateMachine = stateMachine @@ -229,6 +282,7 @@ export class IngestionStepFunction extends Construct { feedReaderFunction, articleIngestionFunction, filterIngestedArticlesFunction, + filterArticlesWithBedrockFunction, stateMachine ], [ diff --git a/lib/shared/api/API.ts b/lib/shared/api/API.ts index 1d24b2f..418697f 100644 --- a/lib/shared/api/API.ts +++ b/lib/shared/api/API.ts @@ -8,6 +8,7 @@ export type CreateDataFeedInput = { description?: string | null, enabled: boolean, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate?: boolean | null, }; @@ -23,6 +24,7 @@ export type DataFeed = { title: string, description?: string | null, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate: boolean, authGranted?: AuthGranted | null, }; @@ -134,6 +136,7 @@ export type UpdateDataFeedInput = { title?: string | null, description?: string | null, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate?: boolean | null, }; @@ -272,6 +275,7 @@ export type CreateDataFeedMutation = { title: string, description?: string | null, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate: boolean, authGranted?: AuthGranted | null, } | null, @@ -303,6 +307,7 @@ export type CreateNewsletterMutation = { title: string, description?: string | null, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate: boolean, authGranted?: AuthGranted | null, } | null > | null, @@ -424,6 +429,7 @@ export type GetNewsletterQuery = { title: string, description?: string | null, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate: boolean, authGranted?: AuthGranted | null, } | null > | null, @@ -461,6 +467,7 @@ export type ListDataFeedsQuery = { title: string, description?: string | null, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate: boolean, authGranted?: AuthGranted | null, } | null > | null, @@ -504,6 +511,7 @@ export type GetDataFeedQuery = { title: string, description?: string | null, summarizationPrompt?: string | null, + articleFilterPrompt?: string | null, isPrivate: boolean, authGranted?: AuthGranted | null, } | null, diff --git a/lib/shared/api/graphql/mutations.ts b/lib/shared/api/graphql/mutations.ts index 4bae8d3..f1daaee 100644 --- a/lib/shared/api/graphql/mutations.ts +++ b/lib/shared/api/graphql/mutations.ts @@ -39,6 +39,7 @@ export const createDataFeed = /* GraphQL */ `mutation CreateDataFeed($input: Cre title description summarizationPrompt + articleFilterPrompt isPrivate authGranted __typename @@ -68,6 +69,7 @@ export const createNewsletter = /* GraphQL */ `mutation CreateNewsletter($input: title description summarizationPrompt + articleFilterPrompt isPrivate authGranted __typename diff --git a/lib/shared/api/graphql/queries.ts b/lib/shared/api/graphql/queries.ts index 4ea4a4b..b667e75 100644 --- a/lib/shared/api/graphql/queries.ts +++ b/lib/shared/api/graphql/queries.ts @@ -58,6 +58,7 @@ export const getNewsletter = /* GraphQL */ `query GetNewsletter($input: GetNewsl title description summarizationPrompt + articleFilterPrompt isPrivate authGranted __typename @@ -96,6 +97,7 @@ export const listDataFeeds = /* GraphQL */ `query ListDataFeeds( title description summarizationPrompt + articleFilterPrompt isPrivate authGranted __typename @@ -139,6 +141,7 @@ export const getDataFeed = /* GraphQL */ `query GetDataFeed($input: GetDataFeedI title description summarizationPrompt + articleFilterPrompt isPrivate authGranted __typename diff --git a/lib/shared/api/schema.graphql b/lib/shared/api/schema.graphql index ad1bb77..195e279 100644 --- a/lib/shared/api/schema.graphql +++ b/lib/shared/api/schema.graphql @@ -66,6 +66,7 @@ type DataFeed { title: String! description: String summarizationPrompt: String + articleFilterPrompt: String isPrivate: Boolean! authGranted: AuthGranted } @@ -125,6 +126,7 @@ input CreateDataFeedInput { description: String enabled: Boolean! summarizationPrompt: String + articleFilterPrompt: String isPrivate: Boolean } @@ -136,6 +138,7 @@ input UpdateDataFeedInput { title: String description: String summarizationPrompt: String + articleFilterPrompt: String isPrivate: Boolean } diff --git a/lib/shared/api/types.json b/lib/shared/api/types.json index c0c2a4a..8791d59 100644 --- a/lib/shared/api/types.json +++ b/lib/shared/api/types.json @@ -542,6 +542,18 @@ "description": null, "fields": null, "inputFields": [ + { + "name": "articleFilterPrompt", + "description": null, + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "defaultValue": null, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "description", "description": null, @@ -767,6 +779,18 @@ "isDeprecated": false, "deprecationReason": null }, + { + "name": "articleFilterPrompt", + "description": null, + "args": [], + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "articles", "description": null, @@ -2520,6 +2544,18 @@ "description": null, "fields": null, "inputFields": [ + { + "name": "articleFilterPrompt", + "description": null, + "type": { + "kind": "SCALAR", + "name": "String", + "ofType": null + }, + "defaultValue": null, + "isDeprecated": false, + "deprecationReason": null + }, { "name": "description", "description": null, diff --git a/lib/shared/common/types.ts b/lib/shared/common/types.ts index c8889a5..9dd579c 100644 --- a/lib/shared/common/types.ts +++ b/lib/shared/common/types.ts @@ -34,7 +34,7 @@ export interface CreateAuthCheckInput { } } export interface FeedArticle { - link: string + url: string title: string guid: string subscriptionId?: string diff --git a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/article-table.tsx b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/article-table.tsx index bf045fb..fbf0878 100644 --- a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/article-table.tsx +++ b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/article-table.tsx @@ -135,7 +135,7 @@ export default function DataFeedArticleTable () { resizableColumns items={articles} loading={loading} - trackBy="articleId" + trackBy="id" loadingText="Loading data feed articles" empty={ diff --git a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feed-detail.tsx b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feed-detail.tsx index c0e3d07..68368a3 100644 --- a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feed-detail.tsx +++ b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feed-detail.tsx @@ -91,6 +91,13 @@ export default function DataFeedDetail () { )} + + {setDataFeedId?.articleFilterPrompt ?? ( + + No Custom Filter Provided + + )} + )} diff --git a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feeds-table.tsx b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feeds-table.tsx index a6747e9..3fb1b28 100644 --- a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feeds-table.tsx +++ b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/data-feeds-table.tsx @@ -171,7 +171,7 @@ export default function DataFeedsTable (input?: ListDataFeedsInput) { resizableColumns loading={loadingDataFeeds} selectionType="single" - trackBy="dataFeedId" + trackBy="id" selectedItems={selectedDataFeed ? [selectedDataFeed] : []} onSelectionChange={({ detail }) => { setSelectedDataFeed(detail.selectedItems[0]) diff --git a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/forms/data-feed-details-form.tsx b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/forms/data-feed-details-form.tsx index 2b00d44..392e592 100644 --- a/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/forms/data-feed-details-form.tsx +++ b/lib/user-interface/genai-newsletter-ui/src/components/data-feeds/forms/data-feed-details-form.tsx @@ -36,6 +36,7 @@ export default function DataFeedDetailsForm () { const [titleError] = useState('') const [description, setDescription] = useState('') const [summarizationPrompt, setSummarizationPrompt] = useState('') + const [articleFilterPrompt, setArticleFilterPrompt] = useState('') const [isPrivate, setIsPrivate] = useState(true) const getDataFeedData = useCallback(async () => { @@ -66,6 +67,7 @@ export default function DataFeedDetailsForm () { setDescription(result.data.getDataFeed?.description ?? '') setSummarizationPrompt(result.data.getDataFeed?.summarizationPrompt ?? '') setIsPrivate(result.data.getDataFeed?.isPrivate ?? true) + setArticleFilterPrompt(result.data.getDataFeed?.articleFilterPrompt ?? '') setLoading(false) }, [appContext, dataFeedId]) @@ -88,6 +90,7 @@ export default function DataFeedDetailsForm () { title, description, summarizationPrompt, + articleFilterPrompt, isPrivate } } @@ -99,14 +102,15 @@ export default function DataFeedDetailsForm () { navigate(`/feeds/${dataFeedId}`) }, [ appContext, - description, - enabled, - navigate, dataFeedId, - summarizationPrompt, - title, url, - isPrivate + enabled, + title, + description, + summarizationPrompt, + articleFilterPrompt, + isPrivate, + navigate ]) const createDataFeedAction = useCallback(async () => { @@ -124,6 +128,7 @@ export default function DataFeedDetailsForm () { title, description, summarizationPrompt, + articleFilterPrompt, isPrivate } } @@ -135,6 +140,7 @@ export default function DataFeedDetailsForm () { navigate(`/feeds/${result.data.createDataFeed?.id}`) }, [ appContext, + articleFilterPrompt, description, enabled, isPrivate, @@ -254,6 +260,16 @@ export default function DataFeedDetailsForm () { onChange={(e) => setSummarizationPrompt(e.detail.value)} /> + +